diff --git a/.gitignore b/.gitignore index c2ed4ecb0acd..4105cfbd6f26 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,7 @@ *.lzo *.patch *.gcno +*.ll modules.builtin Module.symvers *.dwo @@ -114,3 +115,6 @@ all.config # Kdevelop4 *.kdev4 + +# fetched Android config fragments +kernel/configs/android-*.cfg diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index a809f6005f14..cecb93af8967 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -57,6 +57,15 @@ Contact: "Jaegeuk Kim" Description: Controls the issue rate of small discard commands. +What: /sys/fs/f2fs//discard_granularity +Date: July 2017 +Contact: "Chao Yu" +Description: + Controls discard granularity of inner discard thread, inner thread + will not issue discards with size that is smaller than granularity. + The unit size is one block, now only support configuring in range + of [1, 512]. + What: /sys/fs/f2fs//max_victim_search Date: January 2014 Contact: "Jaegeuk Kim" @@ -112,3 +121,33 @@ Date: January 2016 Contact: "Shuoran Liu" Description: Shows total written kbytes issued to disk. + +What: /sys/fs/f2fs//inject_rate +Date: May 2016 +Contact: "Sheng Yong" +Description: + Controls the injection rate. + +What: /sys/fs/f2fs//inject_type +Date: May 2016 +Contact: "Sheng Yong" +Description: + Controls the injection type. + +What: /sys/fs/f2fs//reserved_blocks +Date: June 2017 +Contact: "Chao Yu" +Description: + Controls current reserved blocks in system. + +What: /sys/fs/f2fs//gc_urgent +Date: August 2017 +Contact: "Jaegeuk Kim" +Description: + Do background GC agressively + +What: /sys/fs/f2fs//gc_urgent_sleep_time +Date: August 2017 +Contact: "Jaegeuk Kim" +Description: + Controls sleep time of GC urgent mode diff --git a/Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt b/Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt new file mode 100644 index 000000000000..6ec1a880ac18 --- /dev/null +++ b/Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt @@ -0,0 +1,46 @@ +THS8135 Video DAC +----------------- + +This is the binding for Texas Instruments THS8135 Video DAC bridge. + +Required properties: + +- compatible: Must be "ti,ths8135" + +Required nodes: + +This device has two video ports. Their connections are modelled using the OF +graph bindings specified in Documentation/devicetree/bindings/graph.txt. + +- Video port 0 for RGB input +- Video port 1 for VGA output + +Example +------- + +vga-bridge { + compatible = "ti,ths8135"; + #address-cells = <1>; + #size-cells = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + vga_bridge_in: endpoint { + remote-endpoint = <&lcdc_out_vga>; + }; + }; + + port@1 { + reg = <1>; + + vga_bridge_out: endpoint { + remote-endpoint = <&vga_con_in>; + }; + }; + }; +}; diff --git a/Documentation/devicetree/bindings/iio/adc/avia-hx711.txt b/Documentation/devicetree/bindings/iio/adc/avia-hx711.txt new file mode 100644 index 000000000000..b3629405f568 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/avia-hx711.txt @@ -0,0 +1,18 @@ +* AVIA HX711 ADC chip for weight cells + Bit-banging driver + +Required properties: + - compatible: Should be "avia,hx711" + - sck-gpios: Definition of the GPIO for the clock + - dout-gpios: Definition of the GPIO for data-out + See Documentation/devicetree/bindings/gpio/gpio.txt + - avdd-supply: Definition of the regulator used as analog supply + +Example: +weight@0 { + compatible = "avia,hx711"; + sck-gpios = <&gpio3 10 GPIO_ACTIVE_HIGH>; + dout-gpios = <&gpio0 7 GPIO_ACTIVE_HIGH>; + avdd-suppy = <&avdd>; +}; + diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index f0a48ea78659..bceffffb7502 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -38,6 +38,7 @@ atmel Atmel Corporation auo AU Optronics Corporation auvidea Auvidea GmbH avago Avago Technologies +avia avia semiconductor avic Shanghai AVIC Optoelectronics Co., Ltd. axis Axis Communications AB boe BOE Technology Group Co., Ltd. diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 753dd4f96afe..e7fb61ee6583 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -157,6 +157,20 @@ data_flush Enable data flushing before checkpoint in order to mode=%s Control block allocation mode which supports "adaptive" and "lfs". In "lfs" mode, there should be no random writes towards main area. +io_bits=%u Set the bit size of write IO requests. It should be set + with "mode=lfs". +usrquota Enable plain user disk quota accounting. +grpquota Enable plain group disk quota accounting. +prjquota Enable plain project quota accounting. +usrjquota= Appoint specified file and type during mount, so that quota +grpjquota= information can be properly updated during recovery flow, +prjjquota= : must be in root directory; +jqfmt= : [vfsold,vfsv0,vfsv1]. +offusrjquota Turn off user journelled quota. +offgrpjquota Turn off group journelled quota. +offprjjquota Turn off project journelled quota. +quota Enable plain user disk quota accounting. +noquota Disable all plain disk quota option. ================================================================================ DEBUGFS ENTRIES @@ -202,6 +216,15 @@ Files in /sys/fs/f2fs/ gc_idle = 1 will select the Cost Benefit approach & setting gc_idle = 2 will select the greedy approach. + gc_urgent This parameter controls triggering background GCs + urgently or not. Setting gc_urgent = 0 [default] + makes back to default behavior, while if it is set + to 1, background thread starts to do GC by given + gc_urgent_sleep_time interval. + + gc_urgent_sleep_time This parameter controls sleep time for gc_urgent. + 500 ms is set by default. See above gc_urgent. + reclaim_segments This parameter controls the number of prefree segments to be reclaimed. If the number of prefree segments is larger than the number of segments diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt index 3849814bfe6d..0e03baf271bd 100644 --- a/Documentation/security/keys.txt +++ b/Documentation/security/keys.txt @@ -1151,8 +1151,21 @@ access the data: usage. This is called key->payload.rcu_data0. The following accessors wrap the RCU calls to this element: - rcu_assign_keypointer(struct key *key, void *data); - void *rcu_dereference_key(struct key *key); + (a) Set or change the first payload pointer: + + rcu_assign_keypointer(struct key *key, void *data); + + (b) Read the first payload pointer with the key semaphore held: + + [const] void *dereference_key_locked([const] struct key *key); + + Note that the return value will inherit its constness from the key + parameter. Static analysis will give an error if it things the lock + isn't held. + + (c) Read the first payload pointer with the RCU read lock held: + + const void *dereference_key_rcu(const struct key *key); =================== diff --git a/Kbuild b/Kbuild index 3d0ae152af7c..94c752762bc2 100644 --- a/Kbuild +++ b/Kbuild @@ -7,31 +7,6 @@ # 4) Check for missing system calls # 5) Generate constants.py (may need bounds.h) -# Default sed regexp - multiline due to syntax constraints -define sed-y - "/^->/{s:->#\(.*\):/* \1 */:; \ - s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \ - s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \ - s:->::; p;}" -endef - -# Use filechk to avoid rebuilds when a header changes, but the resulting file -# does not -define filechk_offsets - (set -e; \ - echo "#ifndef $2"; \ - echo "#define $2"; \ - echo "/*"; \ - echo " * DO NOT MODIFY."; \ - echo " *"; \ - echo " * This file was generated by Kbuild"; \ - echo " */"; \ - echo ""; \ - sed -ne $(sed-y); \ - echo ""; \ - echo "#endif" ) -endef - ##### # 1) Generate bounds.h diff --git a/Makefile b/Makefile index b48aebbe187f..9e700c357880 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 51 +SUBLEVEL = 59 EXTRAVERSION = NAME = Roaring Lionus @@ -301,7 +301,7 @@ CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ HOSTCC = gcc HOSTCXX = g++ -HOSTCFLAGS = -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89 +HOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89 HOSTCXXFLAGS = -O2 ifeq ($(shell $(HOSTCC) -v 2>&1 | grep -c "clang version"), 1) @@ -639,7 +639,8 @@ KBUILD_CFLAGS += $(call cc-option,-fdata-sections,) endif ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE -KBUILD_CFLAGS += -Os $(call cc-disable-warning,maybe-uninitialized,) +KBUILD_CFLAGS += $(call cc-option,-Oz,-Os) +KBUILD_CFLAGS += $(call cc-disable-warning,maybe-uninitialized,) else ifdef CONFIG_PROFILE_ALL_BRANCHES KBUILD_CFLAGS += -O2 $(call cc-disable-warning,maybe-uninitialized,) @@ -699,11 +700,22 @@ endif KBUILD_CFLAGS += $(stackp-flag) ifeq ($(cc-name),clang) +ifneq ($(CROSS_COMPILE),) +CLANG_TRIPLE ?= $(CROSS_COMPILE) +CLANG_TARGET := --target=$(notdir $(CLANG_TRIPLE:%-=%)) +GCC_TOOLCHAIN := $(realpath $(dir $(shell which $(LD)))/..) +endif +ifneq ($(GCC_TOOLCHAIN),) +CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN) +endif +KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) +KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) -KBUILD_CPPFLAGS += $(call cc-option,-Wno-unknown-warning-option,) KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable) KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) +KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) +KBUILD_CFLAGS += $(call cc-disable-warning, duplicate-decl-specifier) # Quiet clang warning: comparison of unsigned expression < 0 is always false KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) # CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the @@ -711,6 +723,8 @@ KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) # See modpost pattern 2 KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,) KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior) +KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) +KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) else # These warnings generated too much noise in a regular build. @@ -1362,6 +1376,8 @@ help: @echo ' (default: $$(INSTALL_MOD_PATH)/lib/firmware)' @echo ' dir/ - Build all files in dir and below' @echo ' dir/file.[ois] - Build specified target only' + @echo ' dir/file.ll - Build the LLVM assembly file' + @echo ' (requires compiler support for LLVM assembly generation)' @echo ' dir/file.lst - Build specified mixed source/assembly target only' @echo ' (requires a recent binutils and recent build (System.map))' @echo ' dir/file.ko - Build module including final link' @@ -1546,6 +1562,7 @@ clean: $(clean-dirs) -o -name '*.symtypes' -o -name 'modules.order' \ -o -name modules.builtin -o -name '.tmp_*.o.*' \ -o -name '*.c.[012]*.*' \ + -o -name '*.ll' \ -o -name '*.gcno' \) -type f -print | xargs rm -f # Generate tags for editors @@ -1649,6 +1666,8 @@ endif $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) %.symtypes: %.c prepare scripts FORCE $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) +%.ll: %.c prepare scripts FORCE + $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) # Modules /: prepare scripts FORCE diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 1eea99beecc3..85d9ea4a0acc 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -92,6 +92,12 @@ ENTRY(EV_MachineCheck) lr r0, [efa] mov r1, sp + ; hardware auto-disables MMU, re-enable it to allow kernel vaddr + ; access for say stack unwinding of modules for crash dumps + lr r3, [ARC_REG_PID] + or r3, r3, MMU_ENABLE + sr r3, [ARC_REG_PID] + lsr r3, r2, 8 bmsk r3, r3, 7 brne r3, ECR_C_MCHK_DUP_TLB, 1f diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index bdb295e09160..a4dc881da277 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -896,9 +896,6 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, local_irq_save(flags); - /* re-enable the MMU */ - write_aux_reg(ARC_REG_PID, MMU_ENABLE | read_aux_reg(ARC_REG_PID)); - /* loop thru all sets of TLB */ for (set = 0; set < mmu->sets; set++) { diff --git a/arch/arm/Kconfig-nommu b/arch/arm/Kconfig-nommu index aed66d5df7f1..b7576349528c 100644 --- a/arch/arm/Kconfig-nommu +++ b/arch/arm/Kconfig-nommu @@ -34,8 +34,7 @@ config PROCESSOR_ID used instead of the auto-probing which utilizes the register. config REMAP_VECTORS_TO_RAM - bool 'Install vectors to the beginning of RAM' if DRAM_BASE - depends on DRAM_BASE + bool 'Install vectors to the beginning of RAM' help The kernel needs to change the hardware exception vectors. In nommu mode, the hardware exception vectors are normally diff --git a/arch/arm/boot/dts/am335x-chilisom.dtsi b/arch/arm/boot/dts/am335x-chilisom.dtsi index f9ee5859c154..1b43ebd08b38 100644 --- a/arch/arm/boot/dts/am335x-chilisom.dtsi +++ b/arch/arm/boot/dts/am335x-chilisom.dtsi @@ -124,6 +124,14 @@ &rtc { system-power-controller; + + pinctrl-0 = <&ext_wakeup>; + pinctrl-names = "default"; + + ext_wakeup: ext-wakeup { + pins = "ext_wakeup0"; + input-enable; + }; }; /* NAND Flash */ diff --git a/arch/arm/boot/dts/bcm953012k.dts b/arch/arm/boot/dts/bcm953012k.dts index 05a985a20378..6208e85acd9d 100644 --- a/arch/arm/boot/dts/bcm953012k.dts +++ b/arch/arm/boot/dts/bcm953012k.dts @@ -48,7 +48,7 @@ }; memory { - reg = <0x00000000 0x10000000>; + reg = <0x80000000 0x10000000>; }; }; diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi index 8aa19ba14436..5282d69e55bd 100644 --- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi +++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi @@ -97,11 +97,11 @@ thermal-zones { cpu_thermal: cpu-thermal { cooling-maps { - map0 { + cooling_map0: map0 { /* Corresponds to 800MHz at freq_table */ cooling-device = <&cpu0 7 7>; }; - map1 { + cooling_map1: map1 { /* Corresponds to 200MHz at freq_table */ cooling-device = <&cpu0 13 13>; }; diff --git a/arch/arm/boot/dts/exynos4412-odroidu3.dts b/arch/arm/boot/dts/exynos4412-odroidu3.dts index 99634c54dca9..7504a5aa538e 100644 --- a/arch/arm/boot/dts/exynos4412-odroidu3.dts +++ b/arch/arm/boot/dts/exynos4412-odroidu3.dts @@ -13,6 +13,7 @@ /dts-v1/; #include "exynos4412-odroid-common.dtsi" +#include "exynos4412-prime.dtsi" / { model = "Hardkernel ODROID-U3 board based on Exynos4412"; @@ -47,11 +48,11 @@ cooling-maps { map0 { trip = <&cpu_alert1>; - cooling-device = <&cpu0 7 7>; + cooling-device = <&cpu0 9 9>; }; map1 { trip = <&cpu_alert2>; - cooling-device = <&cpu0 13 13>; + cooling-device = <&cpu0 15 15>; }; map2 { trip = <&cpu_alert0>; diff --git a/arch/arm/boot/dts/exynos4412-odroidx2.dts b/arch/arm/boot/dts/exynos4412-odroidx2.dts index 4d228858f172..d6e92ebc3874 100644 --- a/arch/arm/boot/dts/exynos4412-odroidx2.dts +++ b/arch/arm/boot/dts/exynos4412-odroidx2.dts @@ -12,6 +12,7 @@ */ #include "exynos4412-odroidx.dts" +#include "exynos4412-prime.dtsi" / { model = "Hardkernel ODROID-X2 board based on Exynos4412"; diff --git a/arch/arm/boot/dts/exynos4412-prime.dtsi b/arch/arm/boot/dts/exynos4412-prime.dtsi new file mode 100644 index 000000000000..e75bc170c89c --- /dev/null +++ b/arch/arm/boot/dts/exynos4412-prime.dtsi @@ -0,0 +1,41 @@ +/* + * Samsung's Exynos4412 Prime SoC device tree source + * + * Copyright (c) 2016 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Exynos4412 Prime SoC revision supports higher CPU frequencies than + * non-Prime version. Therefore we need to update OPPs table and + * thermal maps accordingly. + */ + +&cpu0_opp_1500 { + /delete-property/turbo-mode; +}; + +&cpu0_opp_table { + opp@1600000000 { + opp-hz = /bits/ 64 <1600000000>; + opp-microvolt = <1350000>; + clock-latency-ns = <200000>; + }; + opp@1704000000 { + opp-hz = /bits/ 64 <1704000000>; + opp-microvolt = <1350000>; + clock-latency-ns = <200000>; + }; +}; + +&cooling_map0 { + cooling-device = <&cpu0 9 9>; +}; + +&cooling_map1 { + cooling-device = <&cpu0 15 15>; +}; diff --git a/arch/arm/boot/dts/exynos4412.dtsi b/arch/arm/boot/dts/exynos4412.dtsi index 40beede46e55..3ebdf01d814c 100644 --- a/arch/arm/boot/dts/exynos4412.dtsi +++ b/arch/arm/boot/dts/exynos4412.dtsi @@ -130,7 +130,7 @@ opp-microvolt = <1287500>; clock-latency-ns = <200000>; }; - opp@1500000000 { + cpu0_opp_1500: opp@1500000000 { opp-hz = /bits/ 64 <1500000000>; opp-microvolt = <1350000>; clock-latency-ns = <200000>; diff --git a/arch/arm/boot/dts/mt2701.dtsi b/arch/arm/boot/dts/mt2701.dtsi index 18596a2c58a1..77c6b931dc24 100644 --- a/arch/arm/boot/dts/mt2701.dtsi +++ b/arch/arm/boot/dts/mt2701.dtsi @@ -174,4 +174,40 @@ clocks = <&uart_clk>; status = "disabled"; }; + + mmsys: syscon@14000000 { + compatible = "mediatek,mt2701-mmsys", "syscon"; + reg = <0 0x14000000 0 0x1000>; + #clock-cells = <1>; + }; + + imgsys: syscon@15000000 { + compatible = "mediatek,mt2701-imgsys", "syscon"; + reg = <0 0x15000000 0 0x1000>; + #clock-cells = <1>; + }; + + vdecsys: syscon@16000000 { + compatible = "mediatek,mt2701-vdecsys", "syscon"; + reg = <0 0x16000000 0 0x1000>; + #clock-cells = <1>; + }; + + hifsys: syscon@1a000000 { + compatible = "mediatek,mt2701-hifsys", "syscon"; + reg = <0 0x1a000000 0 0x1000>; + #clock-cells = <1>; + }; + + ethsys: syscon@1b000000 { + compatible = "mediatek,mt2701-ethsys", "syscon"; + reg = <0 0x1b000000 0 0x1000>; + #clock-cells = <1>; + }; + + bdpsys: syscon@1c000000 { + compatible = "mediatek,mt2701-bdpsys", "syscon"; + reg = <0 0x1c000000 0 0x1000>; + #clock-cells = <1>; + }; }; diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi index 351fcc2f87df..b6c6410ca384 100644 --- a/arch/arm/boot/dts/r8a7790.dtsi +++ b/arch/arm/boot/dts/r8a7790.dtsi @@ -1493,7 +1493,8 @@ }; msiof0: spi@e6e20000 { - compatible = "renesas,msiof-r8a7790"; + compatible = "renesas,msiof-r8a7790", + "renesas,rcar-gen2-msiof"; reg = <0 0xe6e20000 0 0x0064>; interrupts = ; clocks = <&mstp0_clks R8A7790_CLK_MSIOF0>; @@ -1507,7 +1508,8 @@ }; msiof1: spi@e6e10000 { - compatible = "renesas,msiof-r8a7790"; + compatible = "renesas,msiof-r8a7790", + "renesas,rcar-gen2-msiof"; reg = <0 0xe6e10000 0 0x0064>; interrupts = ; clocks = <&mstp2_clks R8A7790_CLK_MSIOF1>; @@ -1521,7 +1523,8 @@ }; msiof2: spi@e6e00000 { - compatible = "renesas,msiof-r8a7790"; + compatible = "renesas,msiof-r8a7790", + "renesas,rcar-gen2-msiof"; reg = <0 0xe6e00000 0 0x0064>; interrupts = ; clocks = <&mstp2_clks R8A7790_CLK_MSIOF2>; @@ -1535,7 +1538,8 @@ }; msiof3: spi@e6c90000 { - compatible = "renesas,msiof-r8a7790"; + compatible = "renesas,msiof-r8a7790", + "renesas,rcar-gen2-msiof"; reg = <0 0xe6c90000 0 0x0064>; interrupts = ; clocks = <&mstp2_clks R8A7790_CLK_MSIOF3>; diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 31dde8b6f2ea..8ba0e2e5ad97 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -335,7 +335,7 @@ static void at91sam9_sdram_standby(void) at91_ramc_write(1, AT91_SDRAMC_LPR, saved_lpr1); } -static const struct of_device_id const ramc_ids[] __initconst = { +static const struct of_device_id ramc_ids[] __initconst = { { .compatible = "atmel,at91rm9200-sdramc", .data = at91rm9200_standby }, { .compatible = "atmel,at91sam9260-sdramc", .data = at91sam9_sdram_standby }, { .compatible = "atmel,at91sam9g45-ddramc", .data = at91_ddr_standby }, diff --git a/arch/arm/mach-bcm/bcm_kona_smc.c b/arch/arm/mach-bcm/bcm_kona_smc.c index cf3f8658f0e5..a55a7ecf146a 100644 --- a/arch/arm/mach-bcm/bcm_kona_smc.c +++ b/arch/arm/mach-bcm/bcm_kona_smc.c @@ -33,7 +33,7 @@ struct bcm_kona_smc_data { unsigned result; }; -static const struct of_device_id const bcm_kona_smc_ids[] __initconst = { +static const struct of_device_id bcm_kona_smc_ids[] __initconst = { {.compatible = "brcm,kona-smc"}, {.compatible = "bcm,kona-smc"}, /* deprecated name */ {}, diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c index 03da3813f1ab..7d5a44a06648 100644 --- a/arch/arm/mach-cns3xxx/core.c +++ b/arch/arm/mach-cns3xxx/core.c @@ -346,7 +346,7 @@ static struct usb_ohci_pdata cns3xxx_usb_ohci_pdata = { .power_off = csn3xxx_usb_power_off, }; -static const struct of_dev_auxdata const cns3xxx_auxdata[] __initconst = { +static const struct of_dev_auxdata cns3xxx_auxdata[] __initconst = { { "intel,usb-ehci", CNS3XXX_USB_BASE, "ehci-platform", &cns3xxx_usb_ehci_pdata }, { "intel,usb-ohci", CNS3XXX_USB_OHCI_BASE, "ohci-platform", &cns3xxx_usb_ohci_pdata }, { "cavium,cns3420-ahci", CNS3XXX_SATA2_BASE, "ahci", NULL }, diff --git a/arch/arm/mach-omap2/prm_common.c b/arch/arm/mach-omap2/prm_common.c index 5b2f5138d938..f1ca9479491b 100644 --- a/arch/arm/mach-omap2/prm_common.c +++ b/arch/arm/mach-omap2/prm_common.c @@ -713,7 +713,7 @@ static struct omap_prcm_init_data scrm_data __initdata = { }; #endif -static const struct of_device_id const omap_prcm_dt_match_table[] __initconst = { +static const struct of_device_id omap_prcm_dt_match_table[] __initconst = { #ifdef CONFIG_SOC_AM33XX { .compatible = "ti,am3-prcm", .data = &am3_prm_data }, #endif diff --git a/arch/arm/mach-omap2/vc.c b/arch/arm/mach-omap2/vc.c index 2028167fff31..d76b1e5eb8ba 100644 --- a/arch/arm/mach-omap2/vc.c +++ b/arch/arm/mach-omap2/vc.c @@ -559,7 +559,7 @@ struct i2c_init_data { u8 hsscll_12; }; -static const struct i2c_init_data const omap4_i2c_timing_data[] __initconst = { +static const struct i2c_init_data omap4_i2c_timing_data[] __initconst = { { .load = 50, .loadbits = 0x3, diff --git a/arch/arm/mach-spear/time.c b/arch/arm/mach-spear/time.c index 9ccffc1d0f28..aaaa6781b9fe 100644 --- a/arch/arm/mach-spear/time.c +++ b/arch/arm/mach-spear/time.c @@ -204,7 +204,7 @@ static void __init spear_clockevent_init(int irq) setup_irq(irq, &spear_timer_irq); } -static const struct of_device_id const timer_of_match[] __initconst = { +static const struct of_device_id timer_of_match[] __initconst = { { .compatible = "st,spear-timer", }, { }, }; diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index d062f08f5020..4b24964a520a 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -199,6 +199,7 @@ static struct dma_map_ops xen_swiotlb_dma_ops = { .unmap_page = xen_swiotlb_unmap_page, .dma_supported = xen_swiotlb_dma_supported, .set_dma_mask = xen_swiotlb_set_dma_mask, + .mmap = xen_swiotlb_dma_mmap, }; int __init xen_mm_init(void) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 13a64c9b1e4a..f736cda53f6f 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -37,7 +37,14 @@ $(warning LSE atomics not supported by binutils) endif endif -KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) +ifeq ($(cc-name),clang) +# This is a workaround for https://bugs.llvm.org/show_bug.cgi?id=30792. +# TODO: revert when this is fixed in LLVM. +KBUILD_CFLAGS += -mno-implicit-float +else +KBUILD_CFLAGS += -mgeneral-regs-only +endif +KBUILD_CFLAGS += $(lseinstr) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += $(call cc-option, -mpc-relative-literal-loads) KBUILD_CFLAGS += -fno-pic @@ -77,6 +84,10 @@ else TEXT_OFFSET := 0x00080000 endif +ifeq ($(cc-name),clang) +KBUILD_CFLAGS += $(call cc-disable-warning, asm-operand-widths) +endif + # KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - 3)) - (1 << 61) # in 32-bit arithmetic KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \ diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index c98e7e849f06..8550408735a0 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -82,7 +82,8 @@ ENTRY(sha1_ce_transform) ldr dgb, [x0, #16] /* load sha1_ce_state::finalize */ - ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize] + ldr_l w4, sha1_ce_offsetof_finalize, x4 + ldr w4, [x0, x4] /* load input */ 0: ld1 {v8.4s-v11.4s}, [x1], #64 @@ -132,7 +133,8 @@ CPU_LE( rev32 v11.16b, v11.16b ) * the padding is handled by the C code in that case. */ cbz x4, 3f - ldr x4, [x0, #:lo12:sha1_ce_offsetof_count] + ldr_l w4, sha1_ce_offsetof_count, x4 + ldr x4, [x0, x4] movi v9.2d, #0 mov x8, #0x80000000 movi v10.2d, #0 diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index aefda9868627..ea319c055f5d 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -17,9 +17,6 @@ #include #include -#define ASM_EXPORT(sym, val) \ - asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val)); - MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); @@ -32,6 +29,9 @@ struct sha1_ce_state { asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, int blocks); +const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count); +const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize); + static int sha1_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { @@ -52,11 +52,6 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, struct sha1_ce_state *sctx = shash_desc_ctx(desc); bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE); - ASM_EXPORT(sha1_ce_offsetof_count, - offsetof(struct sha1_ce_state, sst.count)); - ASM_EXPORT(sha1_ce_offsetof_finalize, - offsetof(struct sha1_ce_state, finalize)); - /* * Allow the asm code to perform the finalization if there is no * partial data and the input is a round multiple of the block size. diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 01cfee066837..679c6c002f4f 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -88,7 +88,8 @@ ENTRY(sha2_ce_transform) ld1 {dgav.4s, dgbv.4s}, [x0] /* load sha256_ce_state::finalize */ - ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize] + ldr_l w4, sha256_ce_offsetof_finalize, x4 + ldr w4, [x0, x4] /* load input */ 0: ld1 {v16.4s-v19.4s}, [x1], #64 @@ -136,7 +137,8 @@ CPU_LE( rev32 v19.16b, v19.16b ) * the padding is handled by the C code in that case. */ cbz x4, 3f - ldr x4, [x0, #:lo12:sha256_ce_offsetof_count] + ldr_l w4, sha256_ce_offsetof_count, x4 + ldr x4, [x0, x4] movi v17.2d, #0 mov x8, #0x80000000 movi v18.2d, #0 diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 7cd587564a41..0ed9486f75dd 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -17,9 +17,6 @@ #include #include -#define ASM_EXPORT(sym, val) \ - asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val)); - MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); @@ -32,6 +29,11 @@ struct sha256_ce_state { asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, int blocks); +const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state, + sst.count); +const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state, + finalize); + static int sha256_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { @@ -52,11 +54,6 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, struct sha256_ce_state *sctx = shash_desc_ctx(desc); bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE); - ASM_EXPORT(sha256_ce_offsetof_count, - offsetof(struct sha256_ce_state, sst.count)); - ASM_EXPORT(sha256_ce_offsetof_finalize, - offsetof(struct sha256_ce_state, finalize)); - /* * Allow the asm code to perform the finalization if there is no * partial data and the input is a round multiple of the block size. diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 3a405dccb6cf..7278ce729cd6 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -55,6 +55,9 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define alloc_screen_info(x...) &screen_info #define free_screen_info(x...) +/* redeclare as 'hidden' so the compiler will generate relative references */ +extern struct screen_info screen_info __attribute__((__visibility__("hidden"))); + static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) { } diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h index eeaa97559bab..81abea0b7650 100644 --- a/arch/arm64/include/asm/signal32.h +++ b/arch/arm64/include/asm/signal32.h @@ -22,8 +22,6 @@ #define AARCH32_KERN_SIGRET_CODE_OFFSET 0x500 -extern const compat_ulong_t aarch32_sigret_code[6]; - int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set, struct pt_regs *regs); int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index c7d26bb87a9a..1177adf49133 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -486,6 +486,7 @@ ENTRY(kimage_vaddr) * booted in EL1 or EL2 respectively. */ ENTRY(el2_setup) + msr SPsel, #1 // We want to use SP_EL{1,2} mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 b.ne 1f diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index e33359f99d3f..bc463b5881b0 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -515,7 +515,7 @@ static const struct fault_info fault_info[] = { { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 0 translation fault" }, { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, - { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, { do_bad, SIGBUS, 0, "unknown 8" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 3686d6abafde..9edda5466020 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -50,32 +50,10 @@ CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31 # The gate DSO image is built using a special linker script. include $(src)/Makefile.gate -# Calculate NR_IRQ = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, ...) based on config -define sed-y - "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}" -endef -quiet_cmd_nr_irqs = GEN $@ -define cmd_nr_irqs - (set -e; \ - echo "#ifndef __ASM_NR_IRQS_H__"; \ - echo "#define __ASM_NR_IRQS_H__"; \ - echo "/*"; \ - echo " * DO NOT MODIFY."; \ - echo " *"; \ - echo " * This file was generated by Kbuild"; \ - echo " *"; \ - echo " */"; \ - echo ""; \ - sed -ne $(sed-y) $<; \ - echo ""; \ - echo "#endif" ) > $@ -endef - # We use internal kbuild rules to avoid the "is up to date" message from make arch/$(SRCARCH)/kernel/nr-irqs.s: arch/$(SRCARCH)/kernel/nr-irqs.c $(Q)mkdir -p $(dir $@) $(call if_changed_dep,cc_s_c) -include/generated/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s - $(Q)mkdir -p $(dir $@) - $(call cmd,nr_irqs) +include/generated/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s FORCE + $(call filechk,offsets,__ASM_NR_IRQS_H__) diff --git a/arch/mips/ath79/clock.c b/arch/mips/ath79/clock.c index cc3a1e33a600..7e2bb12b64ea 100644 --- a/arch/mips/ath79/clock.c +++ b/arch/mips/ath79/clock.c @@ -508,16 +508,19 @@ static void __init ath79_clocks_init_dt_ng(struct device_node *np) ar9330_clk_init(ref_clk, pll_base); else { pr_err("%s: could not find any appropriate clk_init()\n", dnfn); - goto err_clk; + goto err_iounmap; } if (of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data)) { pr_err("%s: could not register clk provider\n", dnfn); - goto err_clk; + goto err_iounmap; } return; +err_iounmap: + iounmap(pll_base); + err_clk: clk_put(ref_clk); diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index 956db6e201d1..c5d351786416 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -18,9 +18,24 @@ #include #define IRQ_STACK_SIZE THREAD_SIZE +#define IRQ_STACK_START (IRQ_STACK_SIZE - 16) extern void *irq_stack[NR_CPUS]; +/* + * The highest address on the IRQ stack contains a dummy frame put down in + * genex.S (handle_int & except_vec_vi_handler) which is structured as follows: + * + * top ------------ + * | task sp | <- irq_stack[cpu] + IRQ_STACK_START + * ------------ + * | | <- First frame of IRQ context + * ------------ + * + * task sp holds a copy of the task stack pointer where the struct pt_regs + * from exception entry can be found. + */ + static inline bool on_irq_stack(int cpu, unsigned long sp) { unsigned long low = (unsigned long)irq_stack[cpu]; diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index 4be2763f835d..bfff6ea45d51 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -103,6 +103,7 @@ void output_thread_info_defines(void) DEFINE(_THREAD_SIZE, THREAD_SIZE); DEFINE(_THREAD_MASK, THREAD_MASK); DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE); + DEFINE(_IRQ_STACK_START, IRQ_STACK_START); BLANK(); } diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index 59476a607add..a00e87b0256d 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -361,7 +361,7 @@ LEAF(mips_cps_get_bootcfg) END(mips_cps_get_bootcfg) LEAF(mips_cps_boot_vpes) - PTR_L ta2, COREBOOTCFG_VPEMASK(a0) + lw ta2, COREBOOTCFG_VPEMASK(a0) PTR_L ta3, COREBOOTCFG_VPECONFIG(a0) #if defined(CONFIG_CPU_MIPSR6) diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 2ac6c2625c13..ae810da4d499 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -215,9 +215,11 @@ NESTED(handle_int, PT_SIZE, sp) beq t0, t1, 2f /* Switch to IRQ stack */ - li t1, _IRQ_STACK_SIZE + li t1, _IRQ_STACK_START PTR_ADD sp, t0, t1 + /* Save task's sp on IRQ stack so that unwinding can follow it */ + LONG_S s1, 0(sp) 2: jal plat_irq_dispatch @@ -325,9 +327,11 @@ NESTED(except_vec_vi_handler, 0, sp) beq t0, t1, 2f /* Switch to IRQ stack */ - li t1, _IRQ_STACK_SIZE + li t1, _IRQ_STACK_START PTR_ADD sp, t0, t1 + /* Save task's sp on IRQ stack so that unwinding can follow it */ + LONG_S s1, 0(sp) 2: jalr v0 diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index fbbf5fcc695a..1b50958a1373 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -487,31 +487,52 @@ unsigned long notrace unwind_stack_by_address(unsigned long stack_page, unsigned long pc, unsigned long *ra) { + unsigned long low, high, irq_stack_high; struct mips_frame_info info; unsigned long size, ofs; + struct pt_regs *regs; int leaf; - extern void ret_from_irq(void); - extern void ret_from_exception(void); if (!stack_page) return 0; /* - * If we reached the bottom of interrupt context, - * return saved pc in pt_regs. + * IRQ stacks start at IRQ_STACK_START + * task stacks at THREAD_SIZE - 32 */ - if (pc == (unsigned long)ret_from_irq || - pc == (unsigned long)ret_from_exception) { - struct pt_regs *regs; - if (*sp >= stack_page && - *sp + sizeof(*regs) <= stack_page + THREAD_SIZE - 32) { - regs = (struct pt_regs *)*sp; - pc = regs->cp0_epc; - if (!user_mode(regs) && __kernel_text_address(pc)) { - *sp = regs->regs[29]; - *ra = regs->regs[31]; - return pc; - } + low = stack_page; + if (!preemptible() && on_irq_stack(raw_smp_processor_id(), *sp)) { + high = stack_page + IRQ_STACK_START; + irq_stack_high = high; + } else { + high = stack_page + THREAD_SIZE - 32; + irq_stack_high = 0; + } + + /* + * If we reached the top of the interrupt stack, start unwinding + * the interrupted task stack. + */ + if (unlikely(*sp == irq_stack_high)) { + unsigned long task_sp = *(unsigned long *)*sp; + + /* + * Check that the pointer saved in the IRQ stack head points to + * something within the stack of the current task + */ + if (!object_is_on_stack((void *)task_sp)) + return 0; + + /* + * Follow pointer to tasks kernel stack frame where interrupted + * state was saved. + */ + regs = (struct pt_regs *)task_sp; + pc = regs->cp0_epc; + if (!user_mode(regs) && __kernel_text_address(pc)) { + *sp = regs->regs[29]; + *ra = regs->regs[31]; + return pc; } return 0; } @@ -532,8 +553,7 @@ unsigned long notrace unwind_stack_by_address(unsigned long stack_page, if (leaf < 0) return 0; - if (*sp < stack_page || - *sp + info.frame_size > stack_page + THREAD_SIZE - 32) + if (*sp < low || *sp + info.frame_size > high) return 0; if (leaf) diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index d5de67591735..f0a0e6d62be3 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -182,7 +182,7 @@ SECTIONS * Force .bss to 64K alignment so that .bss..swapper_pg_dir * gets that alignment. .sbss should be empty, so there will be * no holes after __init_end. */ - BSS_SECTION(0, 0x10000, 0) + BSS_SECTION(0, 0x10000, 8) _end = . ; diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 90565477dfbd..95bec460b651 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -469,8 +469,8 @@ void __init ltq_soc_init(void) panic("Failed to load xbar nodes from devicetree"); if (of_address_to_resource(np_xbar, 0, &res_xbar)) panic("Failed to get xbar resources"); - if (request_mem_region(res_xbar.start, resource_size(&res_xbar), - res_xbar.name) < 0) + if (!request_mem_region(res_xbar.start, resource_size(&res_xbar), + res_xbar.name)) panic("Failed to get xbar resources"); ltq_xbar_membase = ioremap_nocache(res_xbar.start, diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index e9385bcd9723..9ade60ca08e0 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -2386,7 +2386,6 @@ dcopuop: break; default: /* Reserved R6 ops */ - pr_err("Reserved MIPS R6 CMP.condn.S operation\n"); return SIGILL; } } @@ -2460,7 +2459,6 @@ dcopuop: break; default: /* Reserved R6 ops */ - pr_err("Reserved MIPS R6 CMP.condn.D operation\n"); return SIGILL; } } diff --git a/arch/mips/math-emu/dp_fmax.c b/arch/mips/math-emu/dp_fmax.c index fd71b8daaaf2..5bec64f2884e 100644 --- a/arch/mips/math-emu/dp_fmax.c +++ b/arch/mips/math-emu/dp_fmax.c @@ -47,14 +47,26 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754dp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -80,9 +92,7 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y) return ys ? x : y; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754dp_zero(1); + return ieee754dp_zero(xs & ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; @@ -106,16 +116,32 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y) else if (xs < ys) return x; - /* Compare exponent */ - if (xe > ye) - return x; - else if (xe < ye) - return y; + /* Signs of inputs are equal, let's compare exponents */ + if (xs == 0) { + /* Inputs are both positive */ + if (xe > ye) + return x; + else if (xe < ye) + return y; + } else { + /* Inputs are both negative */ + if (xe > ye) + return y; + else if (xe < ye) + return x; + } - /* Compare mantissa */ + /* Signs and exponents of inputs are equal, let's compare mantissas */ + if (xs == 0) { + /* Inputs are both positive, with equal signs and exponents */ + if (xm <= ym) + return y; + return x; + } + /* Inputs are both negative, with equal signs and exponents */ if (xm <= ym) - return y; - return x; + return x; + return y; } union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) @@ -147,14 +173,26 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754dp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -164,6 +202,9 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) /* * Infinity and zero handling */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754dp_inf(xs & ys); + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): @@ -171,7 +212,6 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): return x; - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): @@ -180,9 +220,7 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) return y; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754dp_zero(1); + return ieee754dp_zero(xs & ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; @@ -207,7 +245,11 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) return y; /* Compare mantissa */ - if (xm <= ym) + if (xm < ym) return y; - return x; + else if (xm > ym) + return x; + else if (xs == 0) + return x; + return y; } diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c index c1072b0dfb95..a287b23818d8 100644 --- a/arch/mips/math-emu/dp_fmin.c +++ b/arch/mips/math-emu/dp_fmin.c @@ -47,14 +47,26 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754dp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -80,9 +92,7 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y) return ys ? y : x; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754dp_zero(1); + return ieee754dp_zero(xs | ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; @@ -106,16 +116,32 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y) else if (xs < ys) return y; - /* Compare exponent */ - if (xe > ye) - return y; - else if (xe < ye) - return x; + /* Signs of inputs are the same, let's compare exponents */ + if (xs == 0) { + /* Inputs are both positive */ + if (xe > ye) + return y; + else if (xe < ye) + return x; + } else { + /* Inputs are both negative */ + if (xe > ye) + return x; + else if (xe < ye) + return y; + } - /* Compare mantissa */ + /* Signs and exponents of inputs are equal, let's compare mantissas */ + if (xs == 0) { + /* Inputs are both positive, with equal signs and exponents */ + if (xm <= ym) + return x; + return y; + } + /* Inputs are both negative, with equal signs and exponents */ if (xm <= ym) - return x; - return y; + return y; + return x; } union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) @@ -147,14 +173,26 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754dp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -164,25 +202,25 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) /* * Infinity and zero handling */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754dp_inf(xs | ys); + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): - return x; + return y; - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): - return y; + return x; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754dp_zero(1); + return ieee754dp_zero(xs | ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; @@ -207,7 +245,11 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) return x; /* Compare mantissa */ - if (xm <= ym) + if (xm < ym) + return x; + else if (xm > ym) + return y; + else if (xs == 1) return x; return y; } diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c index 4a2d03c72959..e0d9be5fbf4c 100644 --- a/arch/mips/math-emu/dp_maddf.c +++ b/arch/mips/math-emu/dp_maddf.c @@ -14,22 +14,45 @@ #include "ieee754dp.h" -enum maddf_flags { - maddf_negate_product = 1 << 0, -}; + +/* 128 bits shift right logical with rounding. */ +void srl128(u64 *hptr, u64 *lptr, int count) +{ + u64 low; + + if (count >= 128) { + *lptr = *hptr != 0 || *lptr != 0; + *hptr = 0; + } else if (count >= 64) { + if (count == 64) { + *lptr = *hptr | (*lptr != 0); + } else { + low = *lptr; + *lptr = *hptr >> (count - 64); + *lptr |= (*hptr << (128 - count)) != 0 || low != 0; + } + *hptr = 0; + } else { + low = *lptr; + *lptr = low >> count | *hptr << (64 - count); + *lptr |= (low << (64 - count)) != 0; + *hptr = *hptr >> count; + } +} static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, union ieee754dp y, enum maddf_flags flags) { int re; int rs; - u64 rm; unsigned lxm; unsigned hxm; unsigned lym; unsigned hym; u64 lrm; u64 hrm; + u64 lzm; + u64 hzm; u64 t; u64 at; int s; @@ -48,52 +71,34 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, ieee754_clearcx(); - switch (zc) { - case IEEE754_CLASS_SNAN: - ieee754_setcx(IEEE754_INVALID_OPERATION); + /* + * Handle the cases when at least one of x, y or z is a NaN. + * Order of precedence is sNaN, qNaN and z, x, y. + */ + if (zc == IEEE754_CLASS_SNAN) return ieee754dp_nanxcpt(z); - case IEEE754_CLASS_DNORM: - DPDNORMZ; - /* QNAN is handled separately below */ - } - - switch (CLPAIR(xc, yc)) { - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): - return ieee754dp_nanxcpt(y); - - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + if (xc == IEEE754_CLASS_SNAN) return ieee754dp_nanxcpt(x); - - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + if (yc == IEEE754_CLASS_SNAN) + return ieee754dp_nanxcpt(y); + if (zc == IEEE754_CLASS_QNAN) + return z; + if (xc == IEEE754_CLASS_QNAN) + return x; + if (yc == IEEE754_CLASS_QNAN) return y; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): - return x; + if (zc == IEEE754_CLASS_DNORM) + DPDNORMZ; + /* ZERO z cases are handled separately below */ + switch (CLPAIR(xc, yc)) { /* * Infinity handling */ case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; ieee754_setcx(IEEE754_INVALID_OPERATION); return ieee754dp_indef(); @@ -102,9 +107,27 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; - return ieee754dp_inf(xs ^ ys); + if ((zc == IEEE754_CLASS_INF) && + ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) || + ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) { + /* + * Cases of addition of infinities with opposite signs + * or subtraction of infinities with same signs. + */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + } + /* + * z is here either not an infinity, or an infinity having the + * same sign as product (x*y) (in case of MADDF.D instruction) + * or product -(x*y) (in MSUBF.D case). The result must be an + * infinity, and its sign is determined only by the value of + * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y. + */ + if (flags & MADDF_NEGATE_PRODUCT) + return ieee754dp_inf(1 ^ (xs ^ ys)); + else + return ieee754dp_inf(xs ^ ys); case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): @@ -113,32 +136,42 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): if (zc == IEEE754_CLASS_INF) return ieee754dp_inf(zs); - /* Multiplication is 0 so just return z */ + if (zc == IEEE754_CLASS_ZERO) { + /* Handle cases +0 + (-0) and similar ones. */ + if ((!(flags & MADDF_NEGATE_PRODUCT) + && (zs == (xs ^ ys))) || + ((flags & MADDF_NEGATE_PRODUCT) + && (zs != (xs ^ ys)))) + /* + * Cases of addition of zeros of equal signs + * or subtraction of zeroes of opposite signs. + * The sign of the resulting zero is in any + * such case determined only by the sign of z. + */ + return z; + + return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); + } + /* x*y is here 0, and z is not 0, so just return z */ return z; case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) + if (zc == IEEE754_CLASS_INF) return ieee754dp_inf(zs); DPDNORMY; break; case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) + if (zc == IEEE754_CLASS_INF) return ieee754dp_inf(zs); DPDNORMX; break; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) + if (zc == IEEE754_CLASS_INF) return ieee754dp_inf(zs); /* fall through to real computations */ } @@ -157,7 +190,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, re = xe + ye; rs = xs ^ ys; - if (flags & maddf_negate_product) + if (flags & MADDF_NEGATE_PRODUCT) rs ^= 1; /* shunt to top of word */ @@ -165,7 +198,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, ym <<= 64 - (DP_FBITS + 1); /* - * Multiply 64 bits xm, ym to give high 64 bits rm with stickness. + * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm. */ /* 32 * 32 => 64 */ @@ -195,78 +228,110 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, hrm = hrm + (t >> 32); - rm = hrm | (lrm != 0); - - /* - * Sticky shift down to normal rounding precision. - */ - if ((s64) rm < 0) { - rm = (rm >> (64 - (DP_FBITS + 1 + 3))) | - ((rm << (DP_FBITS + 1 + 3)) != 0); + /* Put explicit bit at bit 126 if necessary */ + if ((int64_t)hrm < 0) { + lrm = (hrm << 63) | (lrm >> 1); + hrm = hrm >> 1; re++; - } else { - rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) | - ((rm << (DP_FBITS + 1 + 3 + 1)) != 0); } - assert(rm & (DP_HIDDEN_BIT << 3)); - /* And now the addition */ - assert(zm & DP_HIDDEN_BIT); + assert(hrm & (1 << 62)); - /* - * Provide guard,round and stick bit space. - */ - zm <<= 3; + if (zc == IEEE754_CLASS_ZERO) { + /* + * Move explicit bit from bit 126 to bit 55 since the + * ieee754dp_format code expects the mantissa to be + * 56 bits wide (53 + 3 rounding bits). + */ + srl128(&hrm, &lrm, (126 - 55)); + return ieee754dp_format(rs, re, lrm); + } + /* Move explicit bit from bit 52 to bit 126 */ + lzm = 0; + hzm = zm << 10; + assert(hzm & (1 << 62)); + + /* Make the exponents the same */ if (ze > re) { /* * Have to shift y fraction right to align. */ s = ze - re; - rm = XDPSRS(rm, s); + srl128(&hrm, &lrm, s); re += s; } else if (re > ze) { /* * Have to shift x fraction right to align. */ s = re - ze; - zm = XDPSRS(zm, s); + srl128(&hzm, &lzm, s); ze += s; } assert(ze == re); assert(ze <= DP_EMAX); + /* Do the addition */ if (zs == rs) { /* - * Generate 28 bit result of adding two 27 bit numbers - * leaving result in xm, xs and xe. + * Generate 128 bit result by adding two 127 bit numbers + * leaving result in hzm:lzm, zs and ze. */ - zm = zm + rm; - - if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */ - zm = XDPSRS1(zm); + hzm = hzm + hrm + (lzm > (lzm + lrm)); + lzm = lzm + lrm; + if ((int64_t)hzm < 0) { /* carry out */ + srl128(&hzm, &lzm, 1); ze++; } } else { - if (zm >= rm) { - zm = zm - rm; + if (hzm > hrm || (hzm == hrm && lzm >= lrm)) { + hzm = hzm - hrm - (lzm < lrm); + lzm = lzm - lrm; } else { - zm = rm - zm; + hzm = hrm - hzm - (lrm < lzm); + lzm = lrm - lzm; zs = rs; } - if (zm == 0) + if (lzm == 0 && hzm == 0) return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); /* - * Normalize to rounding precision. + * Put explicit bit at bit 126 if necessary. */ - while ((zm >> (DP_FBITS + 3)) == 0) { - zm <<= 1; - ze--; + if (hzm == 0) { + /* left shift by 63 or 64 bits */ + if ((int64_t)lzm < 0) { + /* MSB of lzm is the explicit bit */ + hzm = lzm >> 1; + lzm = lzm << 63; + ze -= 63; + } else { + hzm = lzm; + lzm = 0; + ze -= 64; + } + } + + t = 0; + while ((hzm >> (62 - t)) == 0) + t++; + + assert(t <= 62); + if (t) { + hzm = hzm << t | lzm >> (64 - t); + lzm = lzm << t; + ze -= t; } } - return ieee754dp_format(zs, ze, zm); + /* + * Move explicit bit from bit 126 to bit 55 since the + * ieee754dp_format code expects the mantissa to be + * 56 bits wide (53 + 3 rounding bits). + */ + srl128(&hzm, &lzm, (126 - 55)); + + return ieee754dp_format(zs, ze, lzm); } union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, @@ -278,5 +343,5 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, union ieee754dp y) { - return _dp_maddf(z, x, y, maddf_negate_product); + return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); } diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h index 8bc2f6963324..dd2071f430e0 100644 --- a/arch/mips/math-emu/ieee754int.h +++ b/arch/mips/math-emu/ieee754int.h @@ -26,6 +26,10 @@ #define CLPAIR(x, y) ((x)*6+(y)) +enum maddf_flags { + MADDF_NEGATE_PRODUCT = 1 << 0, +}; + static inline void ieee754_clearcx(void) { ieee754_csr.cx = 0; diff --git a/arch/mips/math-emu/ieee754sp.h b/arch/mips/math-emu/ieee754sp.h index 8476067075fe..0f63e4202cff 100644 --- a/arch/mips/math-emu/ieee754sp.h +++ b/arch/mips/math-emu/ieee754sp.h @@ -45,6 +45,10 @@ static inline int ieee754sp_finite(union ieee754sp x) return SPBEXP(x) != SP_EMAX + 1 + SP_EBIAS; } +/* 64 bit right shift with rounding */ +#define XSPSRS64(v, rs) \ + (((rs) >= 64) ? ((v) != 0) : ((v) >> (rs)) | ((v) << (64-(rs)) != 0)) + /* 3bit extended single precision sticky right shift */ #define XSPSRS(v, rs) \ ((rs > (SP_FBITS+3))?1:((v) >> (rs)) | ((v) << (32-(rs)) != 0)) diff --git a/arch/mips/math-emu/sp_fmax.c b/arch/mips/math-emu/sp_fmax.c index 4d000844e48e..74a5a00d2f22 100644 --- a/arch/mips/math-emu/sp_fmax.c +++ b/arch/mips/math-emu/sp_fmax.c @@ -47,14 +47,26 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754sp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -80,9 +92,7 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y) return ys ? x : y; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754sp_zero(1); + return ieee754sp_zero(xs & ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; @@ -106,16 +116,32 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y) else if (xs < ys) return x; - /* Compare exponent */ - if (xe > ye) - return x; - else if (xe < ye) - return y; + /* Signs of inputs are equal, let's compare exponents */ + if (xs == 0) { + /* Inputs are both positive */ + if (xe > ye) + return x; + else if (xe < ye) + return y; + } else { + /* Inputs are both negative */ + if (xe > ye) + return y; + else if (xe < ye) + return x; + } - /* Compare mantissa */ + /* Signs and exponents of inputs are equal, let's compare mantissas */ + if (xs == 0) { + /* Inputs are both positive, with equal signs and exponents */ + if (xm <= ym) + return y; + return x; + } + /* Inputs are both negative, with equal signs and exponents */ if (xm <= ym) - return y; - return x; + return x; + return y; } union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) @@ -147,14 +173,26 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754sp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -164,6 +202,9 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) /* * Infinity and zero handling */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754sp_inf(xs & ys); + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): @@ -171,7 +212,6 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): return x; - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): @@ -180,9 +220,7 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) return y; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754sp_zero(1); + return ieee754sp_zero(xs & ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; @@ -207,7 +245,11 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) return y; /* Compare mantissa */ - if (xm <= ym) + if (xm < ym) return y; - return x; + else if (xm > ym) + return x; + else if (xs == 0) + return x; + return y; } diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c index 4eb1bb9e9dec..c51385f46b09 100644 --- a/arch/mips/math-emu/sp_fmin.c +++ b/arch/mips/math-emu/sp_fmin.c @@ -47,14 +47,26 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754sp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -80,9 +92,7 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y) return ys ? y : x; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754sp_zero(1); + return ieee754sp_zero(xs | ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; @@ -106,16 +116,32 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y) else if (xs < ys) return y; - /* Compare exponent */ - if (xe > ye) - return y; - else if (xe < ye) - return x; + /* Signs of inputs are the same, let's compare exponents */ + if (xs == 0) { + /* Inputs are both positive */ + if (xe > ye) + return y; + else if (xe < ye) + return x; + } else { + /* Inputs are both negative */ + if (xe > ye) + return x; + else if (xe < ye) + return y; + } - /* Compare mantissa */ + /* Signs and exponents of inputs are equal, let's compare mantissas */ + if (xs == 0) { + /* Inputs are both positive, with equal signs and exponents */ + if (xm <= ym) + return x; + return y; + } + /* Inputs are both negative, with equal signs and exponents */ if (xm <= ym) - return x; - return y; + return y; + return x; } union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) @@ -147,14 +173,26 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754sp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -164,25 +202,25 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) /* * Infinity and zero handling */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754sp_inf(xs | ys); + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): - return x; + return y; - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): - return y; + return x; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754sp_zero(1); + return ieee754sp_zero(xs | ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; @@ -207,7 +245,11 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) return x; /* Compare mantissa */ - if (xm <= ym) + if (xm < ym) + return x; + else if (xm > ym) + return y; + else if (xs == 1) return x; return y; } diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c index a8cd8b4f235e..7195fe785d81 100644 --- a/arch/mips/math-emu/sp_maddf.c +++ b/arch/mips/math-emu/sp_maddf.c @@ -14,9 +14,6 @@ #include "ieee754sp.h" -enum maddf_flags { - maddf_negate_product = 1 << 0, -}; static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, union ieee754sp y, enum maddf_flags flags) @@ -24,14 +21,8 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, int re; int rs; unsigned rm; - unsigned short lxm; - unsigned short hxm; - unsigned short lym; - unsigned short hym; - unsigned lrm; - unsigned hrm; - unsigned t; - unsigned at; + uint64_t rm64; + uint64_t zm64; int s; COMPXSP; @@ -48,51 +39,35 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, ieee754_clearcx(); - switch (zc) { - case IEEE754_CLASS_SNAN: - ieee754_setcx(IEEE754_INVALID_OPERATION); + /* + * Handle the cases when at least one of x, y or z is a NaN. + * Order of precedence is sNaN, qNaN and z, x, y. + */ + if (zc == IEEE754_CLASS_SNAN) return ieee754sp_nanxcpt(z); - case IEEE754_CLASS_DNORM: - SPDNORMZ; - /* QNAN is handled separately below */ - } - - switch (CLPAIR(xc, yc)) { - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): - return ieee754sp_nanxcpt(y); - - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + if (xc == IEEE754_CLASS_SNAN) return ieee754sp_nanxcpt(x); - - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + if (yc == IEEE754_CLASS_SNAN) + return ieee754sp_nanxcpt(y); + if (zc == IEEE754_CLASS_QNAN) + return z; + if (xc == IEEE754_CLASS_QNAN) + return x; + if (yc == IEEE754_CLASS_QNAN) return y; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): - return x; + if (zc == IEEE754_CLASS_DNORM) + SPDNORMZ; + /* ZERO z cases are handled separately below */ + + switch (CLPAIR(xc, yc)) { + /* * Infinity handling */ case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; ieee754_setcx(IEEE754_INVALID_OPERATION); return ieee754sp_indef(); @@ -101,9 +76,27 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; - return ieee754sp_inf(xs ^ ys); + if ((zc == IEEE754_CLASS_INF) && + ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) || + ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) { + /* + * Cases of addition of infinities with opposite signs + * or subtraction of infinities with same signs. + */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + } + /* + * z is here either not an infinity, or an infinity having the + * same sign as product (x*y) (in case of MADDF.D instruction) + * or product -(x*y) (in MSUBF.D case). The result must be an + * infinity, and its sign is determined only by the value of + * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y. + */ + if (flags & MADDF_NEGATE_PRODUCT) + return ieee754sp_inf(1 ^ (xs ^ ys)); + else + return ieee754sp_inf(xs ^ ys); case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): @@ -112,32 +105,42 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): if (zc == IEEE754_CLASS_INF) return ieee754sp_inf(zs); - /* Multiplication is 0 so just return z */ + if (zc == IEEE754_CLASS_ZERO) { + /* Handle cases +0 + (-0) and similar ones. */ + if ((!(flags & MADDF_NEGATE_PRODUCT) + && (zs == (xs ^ ys))) || + ((flags & MADDF_NEGATE_PRODUCT) + && (zs != (xs ^ ys)))) + /* + * Cases of addition of zeros of equal signs + * or subtraction of zeroes of opposite signs. + * The sign of the resulting zero is in any + * such case determined only by the sign of z. + */ + return z; + + return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); + } + /* x*y is here 0, and z is not 0, so just return z */ return z; case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) + if (zc == IEEE754_CLASS_INF) return ieee754sp_inf(zs); SPDNORMY; break; case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) + if (zc == IEEE754_CLASS_INF) return ieee754sp_inf(zs); SPDNORMX; break; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) + if (zc == IEEE754_CLASS_INF) return ieee754sp_inf(zs); /* fall through to real computations */ } @@ -158,108 +161,93 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, re = xe + ye; rs = xs ^ ys; - if (flags & maddf_negate_product) + if (flags & MADDF_NEGATE_PRODUCT) rs ^= 1; - /* shunt to top of word */ - xm <<= 32 - (SP_FBITS + 1); - ym <<= 32 - (SP_FBITS + 1); + /* Multiple 24 bit xm and ym to give 48 bit results */ + rm64 = (uint64_t)xm * ym; - /* - * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. - */ - lxm = xm & 0xffff; - hxm = xm >> 16; - lym = ym & 0xffff; - hym = ym >> 16; + /* Shunt to top of word */ + rm64 = rm64 << 16; - lrm = lxm * lym; /* 16 * 16 => 32 */ - hrm = hxm * hym; /* 16 * 16 => 32 */ - - t = lxm * hym; /* 16 * 16 => 32 */ - at = lrm + (t << 16); - hrm += at < lrm; - lrm = at; - hrm = hrm + (t >> 16); - - t = hxm * lym; /* 16 * 16 => 32 */ - at = lrm + (t << 16); - hrm += at < lrm; - lrm = at; - hrm = hrm + (t >> 16); - - rm = hrm | (lrm != 0); - - /* - * Sticky shift down to normal rounding precision. - */ - if ((int) rm < 0) { - rm = (rm >> (32 - (SP_FBITS + 1 + 3))) | - ((rm << (SP_FBITS + 1 + 3)) != 0); + /* Put explicit bit at bit 62 if necessary */ + if ((int64_t) rm64 < 0) { + rm64 = rm64 >> 1; re++; - } else { - rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) | - ((rm << (SP_FBITS + 1 + 3 + 1)) != 0); } - assert(rm & (SP_HIDDEN_BIT << 3)); - /* And now the addition */ + assert(rm64 & (1 << 62)); - assert(zm & SP_HIDDEN_BIT); + if (zc == IEEE754_CLASS_ZERO) { + /* + * Move explicit bit from bit 62 to bit 26 since the + * ieee754sp_format code expects the mantissa to be + * 27 bits wide (24 + 3 rounding bits). + */ + rm = XSPSRS64(rm64, (62 - 26)); + return ieee754sp_format(rs, re, rm); + } - /* - * Provide guard,round and stick bit space. - */ - zm <<= 3; + /* Move explicit bit from bit 23 to bit 62 */ + zm64 = (uint64_t)zm << (62 - 23); + assert(zm64 & (1 << 62)); + /* Make the exponents the same */ if (ze > re) { /* * Have to shift r fraction right to align. */ s = ze - re; - rm = XSPSRS(rm, s); + rm64 = XSPSRS64(rm64, s); re += s; } else if (re > ze) { /* * Have to shift z fraction right to align. */ s = re - ze; - zm = XSPSRS(zm, s); + zm64 = XSPSRS64(zm64, s); ze += s; } assert(ze == re); assert(ze <= SP_EMAX); + /* Do the addition */ if (zs == rs) { /* - * Generate 28 bit result of adding two 27 bit numbers - * leaving result in zm, zs and ze. + * Generate 64 bit result by adding two 63 bit numbers + * leaving result in zm64, zs and ze. */ - zm = zm + rm; - - if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */ - zm = XSPSRS1(zm); + zm64 = zm64 + rm64; + if ((int64_t)zm64 < 0) { /* carry out */ + zm64 = XSPSRS1(zm64); ze++; } } else { - if (zm >= rm) { - zm = zm - rm; + if (zm64 >= rm64) { + zm64 = zm64 - rm64; } else { - zm = rm - zm; + zm64 = rm64 - zm64; zs = rs; } - if (zm == 0) + if (zm64 == 0) return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); /* - * Normalize in extended single precision + * Put explicit bit at bit 62 if necessary. */ - while ((zm >> (SP_MBITS + 3)) == 0) { - zm <<= 1; + while ((zm64 >> 62) == 0) { + zm64 <<= 1; ze--; } - } + + /* + * Move explicit bit from bit 62 to bit 26 since the + * ieee754sp_format code expects the mantissa to be + * 27 bits wide (24 + 3 rounding bits). + */ + zm = XSPSRS64(zm64, (62 - 26)); + return ieee754sp_format(zs, ze, zm); } @@ -272,5 +260,5 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, union ieee754sp y) { - return _sp_maddf(z, x, y, maddf_negate_product); + return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); } diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c index 3c7c9bf57bf3..6f892c1f3ad7 100644 --- a/arch/mips/ralink/mt7620.c +++ b/arch/mips/ralink/mt7620.c @@ -176,7 +176,7 @@ static struct rt2880_pmx_func spi_cs1_grp_mt7628[] = { static struct rt2880_pmx_func spis_grp_mt7628[] = { FUNC("pwm_uart2", 3, 14, 4), - FUNC("util", 2, 14, 4), + FUNC("utif", 2, 14, 4), FUNC("gpio", 1, 14, 4), FUNC("spis", 0, 14, 4), }; @@ -190,28 +190,28 @@ static struct rt2880_pmx_func gpio_grp_mt7628[] = { static struct rt2880_pmx_func p4led_kn_grp_mt7628[] = { FUNC("jtag", 3, 30, 1), - FUNC("util", 2, 30, 1), + FUNC("utif", 2, 30, 1), FUNC("gpio", 1, 30, 1), FUNC("p4led_kn", 0, 30, 1), }; static struct rt2880_pmx_func p3led_kn_grp_mt7628[] = { FUNC("jtag", 3, 31, 1), - FUNC("util", 2, 31, 1), + FUNC("utif", 2, 31, 1), FUNC("gpio", 1, 31, 1), FUNC("p3led_kn", 0, 31, 1), }; static struct rt2880_pmx_func p2led_kn_grp_mt7628[] = { FUNC("jtag", 3, 32, 1), - FUNC("util", 2, 32, 1), + FUNC("utif", 2, 32, 1), FUNC("gpio", 1, 32, 1), FUNC("p2led_kn", 0, 32, 1), }; static struct rt2880_pmx_func p1led_kn_grp_mt7628[] = { FUNC("jtag", 3, 33, 1), - FUNC("util", 2, 33, 1), + FUNC("utif", 2, 33, 1), FUNC("gpio", 1, 33, 1), FUNC("p1led_kn", 0, 33, 1), }; @@ -232,28 +232,28 @@ static struct rt2880_pmx_func wled_kn_grp_mt7628[] = { static struct rt2880_pmx_func p4led_an_grp_mt7628[] = { FUNC("jtag", 3, 39, 1), - FUNC("util", 2, 39, 1), + FUNC("utif", 2, 39, 1), FUNC("gpio", 1, 39, 1), FUNC("p4led_an", 0, 39, 1), }; static struct rt2880_pmx_func p3led_an_grp_mt7628[] = { FUNC("jtag", 3, 40, 1), - FUNC("util", 2, 40, 1), + FUNC("utif", 2, 40, 1), FUNC("gpio", 1, 40, 1), FUNC("p3led_an", 0, 40, 1), }; static struct rt2880_pmx_func p2led_an_grp_mt7628[] = { FUNC("jtag", 3, 41, 1), - FUNC("util", 2, 41, 1), + FUNC("utif", 2, 41, 1), FUNC("gpio", 1, 41, 1), FUNC("p2led_an", 0, 41, 1), }; static struct rt2880_pmx_func p1led_an_grp_mt7628[] = { FUNC("jtag", 3, 42, 1), - FUNC("util", 2, 42, 1), + FUNC("utif", 2, 42, 1), FUNC("gpio", 1, 42, 1), FUNC("p1led_an", 0, 42, 1), }; diff --git a/arch/mips/ralink/rt3883.c b/arch/mips/ralink/rt3883.c index 9e4631acfcb5..3e68e35daf21 100644 --- a/arch/mips/ralink/rt3883.c +++ b/arch/mips/ralink/rt3883.c @@ -145,5 +145,5 @@ void prom_soc_init(struct ralink_soc_info *soc_info) rt2880_pinmux_data = rt3883_pinmux_data; - ralink_soc == RT3883_SOC; + ralink_soc = RT3883_SOC; } diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c index 518f4f5f1f43..d63d42533133 100644 --- a/arch/parisc/kernel/perf.c +++ b/arch/parisc/kernel/perf.c @@ -39,7 +39,7 @@ * the PDC INTRIGUE calls. This is done to eliminate bugs introduced * in various PDC revisions. The code is much more maintainable * and reliable this way vs having to debug on every version of PDC - * on every box. + * on every box. */ #include @@ -195,8 +195,8 @@ static int perf_config(uint32_t *image_ptr); static int perf_release(struct inode *inode, struct file *file); static int perf_open(struct inode *inode, struct file *file); static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos); -static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, - loff_t *ppos); +static ssize_t perf_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos); static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg); static void perf_start_counters(void); static int perf_stop_counters(uint32_t *raddr); @@ -222,7 +222,7 @@ extern void perf_intrigue_disable_perf_counters (void); /* * configure: * - * Configure the cpu with a given data image. First turn off the counters, + * Configure the cpu with a given data image. First turn off the counters, * then download the image, then turn the counters back on. */ static int perf_config(uint32_t *image_ptr) @@ -234,7 +234,7 @@ static int perf_config(uint32_t *image_ptr) error = perf_stop_counters(raddr); if (error != 0) { printk("perf_config: perf_stop_counters = %ld\n", error); - return -EINVAL; + return -EINVAL; } printk("Preparing to write image\n"); @@ -242,7 +242,7 @@ printk("Preparing to write image\n"); error = perf_write_image((uint64_t *)image_ptr); if (error != 0) { printk("perf_config: DOWNLOAD = %ld\n", error); - return -EINVAL; + return -EINVAL; } printk("Preparing to start counters\n"); @@ -254,7 +254,7 @@ printk("Preparing to start counters\n"); } /* - * Open the device and initialize all of its memory. The device is only + * Open the device and initialize all of its memory. The device is only * opened once, but can be "queried" by multiple processes that know its * file descriptor. */ @@ -298,8 +298,8 @@ static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t * called on the processor that the download should happen * on. */ -static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, - loff_t *ppos) +static ssize_t perf_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) { int err; size_t image_size; @@ -307,11 +307,11 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun uint32_t interface_type; uint32_t test; - if (perf_processor_interface == ONYX_INTF) + if (perf_processor_interface == ONYX_INTF) image_size = PCXU_IMAGE_SIZE; - else if (perf_processor_interface == CUDA_INTF) + else if (perf_processor_interface == CUDA_INTF) image_size = PCXW_IMAGE_SIZE; - else + else return -EFAULT; if (!capable(CAP_SYS_ADMIN)) @@ -331,22 +331,22 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun /* First check the machine type is correct for the requested image */ - if (((perf_processor_interface == CUDA_INTF) && - (interface_type != CUDA_INTF)) || - ((perf_processor_interface == ONYX_INTF) && - (interface_type != ONYX_INTF))) + if (((perf_processor_interface == CUDA_INTF) && + (interface_type != CUDA_INTF)) || + ((perf_processor_interface == ONYX_INTF) && + (interface_type != ONYX_INTF))) return -EINVAL; /* Next check to make sure the requested image is valid */ - if (((interface_type == CUDA_INTF) && + if (((interface_type == CUDA_INTF) && (test >= MAX_CUDA_IMAGES)) || - ((interface_type == ONYX_INTF) && - (test >= MAX_ONYX_IMAGES))) + ((interface_type == ONYX_INTF) && + (test >= MAX_ONYX_IMAGES))) return -EINVAL; /* Copy the image into the processor */ - if (interface_type == CUDA_INTF) + if (interface_type == CUDA_INTF) return perf_config(cuda_images[test]); else return perf_config(onyx_images[test]); @@ -360,7 +360,7 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun static void perf_patch_images(void) { #if 0 /* FIXME!! */ -/* +/* * NOTE: this routine is VERY specific to the current TLB image. * If the image is changed, this routine might also need to be changed. */ @@ -368,9 +368,9 @@ static void perf_patch_images(void) extern void $i_dtlb_miss_2_0(); extern void PA2_0_iva(); - /* + /* * We can only use the lower 32-bits, the upper 32-bits should be 0 - * anyway given this is in the kernel + * anyway given this is in the kernel */ uint32_t itlb_addr = (uint32_t)&($i_itlb_miss_2_0); uint32_t dtlb_addr = (uint32_t)&($i_dtlb_miss_2_0); @@ -378,21 +378,21 @@ static void perf_patch_images(void) if (perf_processor_interface == ONYX_INTF) { /* clear last 2 bytes */ - onyx_images[TLBMISS][15] &= 0xffffff00; + onyx_images[TLBMISS][15] &= 0xffffff00; /* set 2 bytes */ onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00; onyx_images[TLBMISS][17] = itlb_addr; /* clear last 2 bytes */ - onyx_images[TLBHANDMISS][15] &= 0xffffff00; + onyx_images[TLBHANDMISS][15] &= 0xffffff00; /* set 2 bytes */ onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00; onyx_images[TLBHANDMISS][17] = itlb_addr; /* clear last 2 bytes */ - onyx_images[BIG_CPI][15] &= 0xffffff00; + onyx_images[BIG_CPI][15] &= 0xffffff00; /* set 2 bytes */ onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24)); onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00; @@ -405,24 +405,24 @@ static void perf_patch_images(void) } else if (perf_processor_interface == CUDA_INTF) { /* Cuda interface */ - cuda_images[TLBMISS][16] = + cuda_images[TLBMISS][16] = (cuda_images[TLBMISS][16]&0xffff0000) | ((dtlb_addr >> 8)&0x0000ffff); - cuda_images[TLBMISS][17] = + cuda_images[TLBMISS][17] = ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000; - cuda_images[TLBHANDMISS][16] = + cuda_images[TLBHANDMISS][16] = (cuda_images[TLBHANDMISS][16]&0xffff0000) | ((dtlb_addr >> 8)&0x0000ffff); - cuda_images[TLBHANDMISS][17] = + cuda_images[TLBHANDMISS][17] = ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000; - cuda_images[BIG_CPI][16] = + cuda_images[BIG_CPI][16] = (cuda_images[BIG_CPI][16]&0xffff0000) | ((dtlb_addr >> 8)&0x0000ffff); - cuda_images[BIG_CPI][17] = + cuda_images[BIG_CPI][17] = ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000; } else { @@ -434,7 +434,7 @@ static void perf_patch_images(void) /* * ioctl routine - * All routines effect the processor that they are executed on. Thus you + * All routines effect the processor that they are executed on. Thus you * must be running on the processor that you wish to change. */ @@ -460,7 +460,7 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } /* copy out the Counters */ - if (copy_to_user((void __user *)arg, raddr, + if (copy_to_user((void __user *)arg, raddr, sizeof (raddr)) != 0) { error = -EFAULT; break; @@ -488,7 +488,7 @@ static const struct file_operations perf_fops = { .open = perf_open, .release = perf_release }; - + static struct miscdevice perf_dev = { MISC_DYNAMIC_MINOR, PA_PERF_DEV, @@ -596,7 +596,7 @@ static int perf_stop_counters(uint32_t *raddr) /* OR sticky2 (bit 1496) to counter2 bit 32 */ tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000; raddr[2] = (uint32_t)tmp64; - + /* Counter3 is bits 1497 to 1528 */ tmp64 = (userbuf[23] >> 7) & 0x00000000ffffffff; /* OR sticky3 (bit 1529) to counter3 bit 32 */ @@ -618,7 +618,7 @@ static int perf_stop_counters(uint32_t *raddr) userbuf[22] = 0; userbuf[23] = 0; - /* + /* * Write back the zeroed bytes + the image given * the read was destructive. */ @@ -626,13 +626,13 @@ static int perf_stop_counters(uint32_t *raddr) } else { /* - * Read RDR-15 which contains the counters and sticky bits + * Read RDR-15 which contains the counters and sticky bits */ if (!perf_rdr_read_ubuf(15, userbuf)) { return -13; } - /* + /* * Clear out the counters */ perf_rdr_clear(15); @@ -645,7 +645,7 @@ static int perf_stop_counters(uint32_t *raddr) raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL); raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL); } - + return 0; } @@ -683,7 +683,7 @@ static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer) i = tentry->num_words; while (i--) { buffer[i] = 0; - } + } /* Check for bits an even number of 64 */ if ((xbits = width & 0x03f) != 0) { @@ -809,18 +809,22 @@ static int perf_write_image(uint64_t *memaddr) } runway = ioremap_nocache(cpu_device->hpa.start, 4096); + if (!runway) { + pr_err("perf_write_image: ioremap failed!\n"); + return -ENOMEM; + } /* Merge intrigue bits into Runway STATUS 0 */ tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful; - __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul), + __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul), runway + RUNWAY_STATUS); - + /* Write RUNWAY DEBUG registers */ for (i = 0; i < 8; i++) { __raw_writeq(*memaddr++, runway + RUNWAY_DEBUG); } - return 0; + return 0; } /* @@ -844,7 +848,7 @@ printk("perf_rdr_write\n"); perf_rdr_shift_out_U(rdr_num, buffer[i]); } else { perf_rdr_shift_out_W(rdr_num, buffer[i]); - } + } } printk("perf_rdr_write done\n"); } diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 23de307c3052..41e60a9c7db2 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -742,7 +742,7 @@ lws_compare_and_swap_2: 10: ldd 0(%r25), %r25 11: ldd 0(%r24), %r24 #else - /* Load new value into r22/r23 - high/low */ + /* Load old value into r22/r23 - high/low */ 10: ldw 0(%r25), %r22 11: ldw 4(%r25), %r23 /* Load new value into fr4 for atomic store later */ @@ -834,11 +834,11 @@ cas2_action: copy %r0, %r28 #else /* Compare first word */ -19: ldw,ma 0(%r26), %r29 +19: ldw 0(%r26), %r29 sub,= %r29, %r22, %r0 b,n cas2_end /* Compare second word */ -20: ldw,ma 4(%r26), %r29 +20: ldw 4(%r26), %r29 sub,= %r29, %r23, %r0 b,n cas2_end /* Perform the store */ diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index b2da7c8baed7..292458b694fb 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -235,6 +235,28 @@ static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr) #define SWIZ_PTR(p) ((unsigned char __user *)((p) ^ swiz)) +#define __get_user_or_set_dar(_regs, _dest, _addr) \ + ({ \ + int rc = 0; \ + typeof(_addr) __addr = (_addr); \ + if (__get_user_inatomic(_dest, __addr)) { \ + _regs->dar = (unsigned long)__addr; \ + rc = -EFAULT; \ + } \ + rc; \ + }) + +#define __put_user_or_set_dar(_regs, _src, _addr) \ + ({ \ + int rc = 0; \ + typeof(_addr) __addr = (_addr); \ + if (__put_user_inatomic(_src, __addr)) { \ + _regs->dar = (unsigned long)__addr; \ + rc = -EFAULT; \ + } \ + rc; \ + }) + static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr, unsigned int reg, unsigned int nb, unsigned int flags, unsigned int instr, @@ -263,9 +285,10 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr, } else { unsigned long pc = regs->nip ^ (swiz & 4); - if (__get_user_inatomic(instr, - (unsigned int __user *)pc)) + if (__get_user_or_set_dar(regs, instr, + (unsigned int __user *)pc)) return -EFAULT; + if (swiz == 0 && (flags & SW)) instr = cpu_to_le32(instr); nb = (instr >> 11) & 0x1f; @@ -309,31 +332,31 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr, ((nb0 + 3) / 4) * sizeof(unsigned long)); for (i = 0; i < nb; ++i, ++p) - if (__get_user_inatomic(REG_BYTE(rptr, i ^ bswiz), - SWIZ_PTR(p))) + if (__get_user_or_set_dar(regs, REG_BYTE(rptr, i ^ bswiz), + SWIZ_PTR(p))) return -EFAULT; if (nb0 > 0) { rptr = ®s->gpr[0]; addr += nb; for (i = 0; i < nb0; ++i, ++p) - if (__get_user_inatomic(REG_BYTE(rptr, - i ^ bswiz), - SWIZ_PTR(p))) + if (__get_user_or_set_dar(regs, + REG_BYTE(rptr, i ^ bswiz), + SWIZ_PTR(p))) return -EFAULT; } } else { for (i = 0; i < nb; ++i, ++p) - if (__put_user_inatomic(REG_BYTE(rptr, i ^ bswiz), - SWIZ_PTR(p))) + if (__put_user_or_set_dar(regs, REG_BYTE(rptr, i ^ bswiz), + SWIZ_PTR(p))) return -EFAULT; if (nb0 > 0) { rptr = ®s->gpr[0]; addr += nb; for (i = 0; i < nb0; ++i, ++p) - if (__put_user_inatomic(REG_BYTE(rptr, - i ^ bswiz), - SWIZ_PTR(p))) + if (__put_user_or_set_dar(regs, + REG_BYTE(rptr, i ^ bswiz), + SWIZ_PTR(p))) return -EFAULT; } } @@ -345,29 +368,32 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr, * Only POWER6 has these instructions, and it does true little-endian, * so we don't need the address swizzling. */ -static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg, - unsigned int flags) +static int emulate_fp_pair(struct pt_regs *regs, unsigned char __user *addr, + unsigned int reg, unsigned int flags) { char *ptr0 = (char *) ¤t->thread.TS_FPR(reg); char *ptr1 = (char *) ¤t->thread.TS_FPR(reg+1); - int i, ret, sw = 0; + int i, sw = 0; if (reg & 1) return 0; /* invalid form: FRS/FRT must be even */ if (flags & SW) sw = 7; - ret = 0; + for (i = 0; i < 8; ++i) { if (!(flags & ST)) { - ret |= __get_user(ptr0[i^sw], addr + i); - ret |= __get_user(ptr1[i^sw], addr + i + 8); + if (__get_user_or_set_dar(regs, ptr0[i^sw], addr + i)) + return -EFAULT; + if (__get_user_or_set_dar(regs, ptr1[i^sw], addr + i + 8)) + return -EFAULT; } else { - ret |= __put_user(ptr0[i^sw], addr + i); - ret |= __put_user(ptr1[i^sw], addr + i + 8); + if (__put_user_or_set_dar(regs, ptr0[i^sw], addr + i)) + return -EFAULT; + if (__put_user_or_set_dar(regs, ptr1[i^sw], addr + i + 8)) + return -EFAULT; } } - if (ret) - return -EFAULT; + return 1; /* exception handled and fixed up */ } @@ -377,24 +403,27 @@ static int emulate_lq_stq(struct pt_regs *regs, unsigned char __user *addr, { char *ptr0 = (char *)®s->gpr[reg]; char *ptr1 = (char *)®s->gpr[reg+1]; - int i, ret, sw = 0; + int i, sw = 0; if (reg & 1) return 0; /* invalid form: GPR must be even */ if (flags & SW) sw = 7; - ret = 0; + for (i = 0; i < 8; ++i) { if (!(flags & ST)) { - ret |= __get_user(ptr0[i^sw], addr + i); - ret |= __get_user(ptr1[i^sw], addr + i + 8); + if (__get_user_or_set_dar(regs, ptr0[i^sw], addr + i)) + return -EFAULT; + if (__get_user_or_set_dar(regs, ptr1[i^sw], addr + i + 8)) + return -EFAULT; } else { - ret |= __put_user(ptr0[i^sw], addr + i); - ret |= __put_user(ptr1[i^sw], addr + i + 8); + if (__put_user_or_set_dar(regs, ptr0[i^sw], addr + i)) + return -EFAULT; + if (__put_user_or_set_dar(regs, ptr1[i^sw], addr + i + 8)) + return -EFAULT; } } - if (ret) - return -EFAULT; + return 1; /* exception handled and fixed up */ } #endif /* CONFIG_PPC64 */ @@ -687,9 +716,14 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg, for (j = 0; j < length; j += elsize) { for (i = 0; i < elsize; ++i) { if (flags & ST) - ret |= __put_user(ptr[i^sw], addr + i); + ret = __put_user_or_set_dar(regs, ptr[i^sw], + addr + i); else - ret |= __get_user(ptr[i^sw], addr + i); + ret = __get_user_or_set_dar(regs, ptr[i^sw], + addr + i); + + if (ret) + return ret; } ptr += elsize; #ifdef __LITTLE_ENDIAN__ @@ -739,7 +773,7 @@ int fix_alignment(struct pt_regs *regs) unsigned int dsisr; unsigned char __user *addr; unsigned long p, swiz; - int ret, i; + int i; union data { u64 ll; double dd; @@ -936,7 +970,7 @@ int fix_alignment(struct pt_regs *regs) if (flags & F) { /* Special case for 16-byte FP loads and stores */ PPC_WARN_ALIGNMENT(fp_pair, regs); - return emulate_fp_pair(addr, reg, flags); + return emulate_fp_pair(regs, addr, reg, flags); } else { #ifdef CONFIG_PPC64 /* Special case for 16-byte loads and stores */ @@ -966,15 +1000,12 @@ int fix_alignment(struct pt_regs *regs) } data.ll = 0; - ret = 0; p = (unsigned long)addr; for (i = 0; i < nb; i++) - ret |= __get_user_inatomic(data.v[start + i], - SWIZ_PTR(p++)); - - if (unlikely(ret)) - return -EFAULT; + if (__get_user_or_set_dar(regs, data.v[start + i], + SWIZ_PTR(p++))) + return -EFAULT; } else if (flags & F) { data.ll = current->thread.TS_FPR(reg); @@ -1046,15 +1077,13 @@ int fix_alignment(struct pt_regs *regs) break; } - ret = 0; p = (unsigned long)addr; for (i = 0; i < nb; i++) - ret |= __put_user_inatomic(data.v[start + i], - SWIZ_PTR(p++)); + if (__put_user_or_set_dar(regs, data.v[start + i], + SWIZ_PTR(p++))) + return -EFAULT; - if (unlikely(ret)) - return -EFAULT; } else if (flags & F) current->thread.TS_FPR(reg) = data.ll; else diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 767ef6d68c9e..caa659671599 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -1235,10 +1235,14 @@ _GLOBAL(ftrace_caller) stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save all gprs to pt_regs */ - SAVE_8GPRS(0,r1) - SAVE_8GPRS(8,r1) - SAVE_8GPRS(16,r1) - SAVE_8GPRS(24,r1) + SAVE_GPR(0, r1) + SAVE_10GPRS(2, r1) + SAVE_10GPRS(12, r1) + SAVE_10GPRS(22, r1) + + /* Save previous stack pointer (r1) */ + addi r8, r1, SWITCH_FRAME_SIZE + std r8, GPR1(r1) /* Load special regs for save below */ mfmsr r8 @@ -1292,10 +1296,10 @@ ftrace_call: #endif /* Restore gprs */ - REST_8GPRS(0,r1) - REST_8GPRS(8,r1) - REST_8GPRS(16,r1) - REST_8GPRS(24,r1) + REST_GPR(0,r1) + REST_10GPRS(2,r1) + REST_10GPRS(12,r1) + REST_10GPRS(22,r1) /* Restore callee's TOC */ ld r2, 24(r1) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 2e2fc1e37715..fd68e19b9ef7 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -764,7 +764,29 @@ EXC_REAL(program_check, 0x700, 0x800) EXC_VIRT(program_check, 0x4700, 0x4800, 0x700) TRAMP_KVM(PACA_EXGEN, 0x700) EXC_COMMON_BEGIN(program_check_common) - EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) + /* + * It's possible to receive a TM Bad Thing type program check with + * userspace register values (in particular r1), but with SRR1 reporting + * that we came from the kernel. Normally that would confuse the bad + * stack logic, and we would report a bad kernel stack pointer. Instead + * we switch to the emergency stack if we're taking a TM Bad Thing from + * the kernel. + */ + li r10,MSR_PR /* Build a mask of MSR_PR .. */ + oris r10,r10,0x200000@h /* .. and SRR1_PROGTM */ + and r10,r10,r12 /* Mask SRR1 with that. */ + srdi r10,r10,8 /* Shift it so we can compare */ + cmpldi r10,(0x200000 >> 8) /* .. with an immediate. */ + bne 1f /* If != go to normal path. */ + + /* SRR1 had PR=0 and SRR1_PROGTM=1, so use the emergency stack */ + andi. r10,r12,MSR_PR; /* Set CR0 correctly for label */ + /* 3 in EXCEPTION_PROLOG_COMMON */ + mr r10,r1 /* Save r1 */ + ld r1,PACAEMERGSP(r13) /* Use emergency stack */ + subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ + b 3f /* Jump into the macro !! */ +1: EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index dcbb9144c16d..d97370866a5f 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -131,7 +131,7 @@ static void flush_tmregs_to_thread(struct task_struct *tsk) * in the appropriate thread structures from live. */ - if (tsk != current) + if ((!cpu_has_feature(CPU_FTR_TM)) || (tsk != current)) return; if (MSR_TM_SUSPENDED(mfmsr())) { diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 96698fdf93b4..04e92257fd69 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -452,9 +452,20 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, if (MSR_TM_RESV(msr)) return -EINVAL; - /* pull in MSR TM from user context */ + /* pull in MSR TS bits from user context */ regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); + /* + * Ensure that TM is enabled in regs->msr before we leave the signal + * handler. It could be the case that (a) user disabled the TM bit + * through the manipulation of the MSR bits in uc_mcontext or (b) the + * TM bit was disabled because a sufficient number of context switches + * happened whilst in the signal handler and load_tm overflowed, + * disabling the TM bit. In either case we can end up with an illegal + * TM state leading to a TM Bad Thing when we return to userspace. + */ + regs->msr |= MSR_TM; + /* pull in MSR LE from user context */ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index c379ff5a4438..da2a7eccb10a 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -129,8 +129,11 @@ static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) { struct kvmppc_spapr_tce_table *stt = filp->private_data; + struct kvm *kvm = stt->kvm; + mutex_lock(&kvm->lock); list_del_rcu(&stt->list); + mutex_unlock(&kvm->lock); kvm_put_kvm(stt->kvm); @@ -150,6 +153,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce_64 *args) { struct kvmppc_spapr_tce_table *stt = NULL; + struct kvmppc_spapr_tce_table *siter; unsigned long npages, size; int ret = -ENOMEM; int i; @@ -157,24 +161,16 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, if (!args->size) return -EINVAL; - /* Check this LIOBN hasn't been previously allocated */ - list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { - if (stt->liobn == args->liobn) - return -EBUSY; - } - size = args->size; npages = kvmppc_tce_pages(size); ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); - if (ret) { - stt = NULL; - goto fail; - } + if (ret) + return ret; stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), GFP_KERNEL); if (!stt) - goto fail; + goto fail_acct; stt->liobn = args->liobn; stt->page_shift = args->page_shift; @@ -188,24 +184,39 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, goto fail; } - kvm_get_kvm(kvm); - mutex_lock(&kvm->lock); - list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); + + /* Check this LIOBN hasn't been previously allocated */ + ret = 0; + list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) { + if (siter->liobn == args->liobn) { + ret = -EBUSY; + break; + } + } + + if (!ret) + ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, + stt, O_RDWR | O_CLOEXEC); + + if (ret >= 0) { + list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); + kvm_get_kvm(kvm); + } mutex_unlock(&kvm->lock); - return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, - stt, O_RDWR | O_CLOEXEC); + if (ret >= 0) + return ret; -fail: - if (stt) { - for (i = 0; i < npages; i++) - if (stt->pages[i]) - __free_page(stt->pages[i]); + fail: + for (i = 0; i < npages; i++) + if (stt->pages[i]) + __free_page(stt->pages[i]); - kfree(stt); - } + kfree(stt); + fail_acct: + kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); return ret; } diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 4d0a4e5017c2..8e6dd17fe603 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -201,6 +201,10 @@ CNST_PMC_VAL(1) | CNST_PMC_VAL(2) | CNST_PMC_VAL(3) | \ CNST_PMC_VAL(4) | CNST_PMC_VAL(5) | CNST_PMC_VAL(6) | CNST_NC_VAL +/* + * Lets restrict use of PMC5 for instruction counting. + */ +#define P9_DD1_TEST_ADDER (ISA207_TEST_ADDER | CNST_PMC_VAL(5)) /* Bits in MMCR1 for PowerISA v2.07 */ #define MMCR1_UNIT_SHIFT(pmc) (60 - (4 * ((pmc) - 1))) diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 8e9a81967ff8..9abcd8f65504 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -295,7 +295,7 @@ static struct power_pmu power9_pmu = { .name = "POWER9", .n_counter = MAX_PMU_COUNTERS, .add_fields = ISA207_ADD_FIELDS, - .test_adder = ISA207_TEST_ADDER, + .test_adder = P9_DD1_TEST_ADDER, .compute_mmcr = isa207_compute_mmcr, .config_bhrb = power9_config_bhrb, .bhrb_filter_map = power9_bhrb_filter_map, diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index a560a98bcf3b..6a5e7467445c 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -225,8 +225,10 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index) return -ENOENT; dn = dlpar_configure_connector(drc_index, parent_dn); - if (!dn) + if (!dn) { + of_node_put(parent_dn); return -ENOENT; + } rc = dlpar_attach_node(dn); if (rc) diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index bea785d7f853..af85d6b12028 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -5,6 +5,7 @@ #include typedef struct { + spinlock_t lock; cpumask_t cpu_attach_mask; atomic_t flush_count; unsigned int flush_mm; @@ -25,6 +26,7 @@ typedef struct { } mm_context_t; #define INIT_MM_CONTEXT(name) \ + .context.lock = __SPIN_LOCK_UNLOCKED(name.context.lock), \ .context.pgtable_lock = \ __SPIN_LOCK_UNLOCKED(name.context.pgtable_lock), \ .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 515fea5a3fc4..f65a708ac395 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -15,6 +15,7 @@ static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + spin_lock_init(&mm->context.lock); spin_lock_init(&mm->context.pgtable_lock); INIT_LIST_HEAD(&mm->context.pgtable_list); spin_lock_init(&mm->context.gmap_lock); @@ -93,7 +94,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, if (prev == next) return; cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); - cpumask_set_cpu(cpu, mm_cpumask(next)); /* Clear old ASCE by loading the kernel ASCE. */ __ctl_load(S390_lowcore.kernel_asce, 1, 1); __ctl_load(S390_lowcore.kernel_asce, 7, 7); @@ -111,9 +111,8 @@ static inline void finish_arch_post_lock_switch(void) preempt_disable(); while (atomic_read(&mm->context.flush_count)) cpu_relax(); - - if (mm->context.flush_mm) - __tlb_flush_mm(mm); + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + __tlb_flush_mm_lazy(mm); preempt_enable(); } set_fs(current->thread.mm_segment); @@ -126,6 +125,7 @@ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { switch_mm(prev, next, current); + cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); set_user_asce(next); } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index d33f245af5e9..db74d398a443 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1359,7 +1359,9 @@ static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, static inline void pmdp_invalidate(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID)); + pmd_t pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID); + + pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd); } #define __HAVE_ARCH_PMDP_SET_WRPROTECT diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 39846100682a..eed927aeb08f 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -43,23 +43,6 @@ static inline void __tlb_flush_global(void) * Flush TLB entries for a specific mm on all CPUs (in case gmap is used * this implicates multiple ASCEs!). */ -static inline void __tlb_flush_full(struct mm_struct *mm) -{ - preempt_disable(); - atomic_inc(&mm->context.flush_count); - if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { - /* Local TLB flush */ - __tlb_flush_local(); - } else { - /* Global TLB flush */ - __tlb_flush_global(); - /* Reset TLB flush mask */ - cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); - } - atomic_dec(&mm->context.flush_count); - preempt_enable(); -} - static inline void __tlb_flush_mm(struct mm_struct *mm) { unsigned long gmap_asce; @@ -71,16 +54,18 @@ static inline void __tlb_flush_mm(struct mm_struct *mm) */ preempt_disable(); atomic_inc(&mm->context.flush_count); + /* Reset TLB flush mask */ + cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); + barrier(); gmap_asce = READ_ONCE(mm->context.gmap_asce); if (MACHINE_HAS_IDTE && gmap_asce != -1UL) { if (gmap_asce) __tlb_flush_idte(gmap_asce); __tlb_flush_idte(mm->context.asce); } else { - __tlb_flush_full(mm); + /* Global TLB flush */ + __tlb_flush_global(); } - /* Reset TLB flush mask */ - cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); atomic_dec(&mm->context.flush_count); preempt_enable(); } @@ -94,7 +79,6 @@ static inline void __tlb_flush_kernel(void) } #else #define __tlb_flush_global() __tlb_flush_local() -#define __tlb_flush_full(mm) __tlb_flush_local() /* * Flush TLB entries for a specific ASCE on all CPUs. @@ -112,10 +96,12 @@ static inline void __tlb_flush_kernel(void) static inline void __tlb_flush_mm_lazy(struct mm_struct * mm) { + spin_lock(&mm->context.lock); if (mm->context.flush_mm) { - __tlb_flush_mm(mm); mm->context.flush_mm = 0; + __tlb_flush_mm(mm); } + spin_unlock(&mm->context.lock); } /* diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 18d4107e10ee..97fc449a7470 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -56,13 +56,12 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { - unsigned long mask, result; struct page *head, *page; + unsigned long mask; int refs; - result = write ? 0 : _SEGMENT_ENTRY_PROTECT; - mask = result | _SEGMENT_ENTRY_INVALID; - if ((pmd_val(pmd) & mask) != result) + mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) | _SEGMENT_ENTRY_INVALID; + if ((pmd_val(pmd) & mask) != 0) return 0; VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index be0cc1beed41..3fae200dd251 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -59,8 +59,11 @@ extern atomic_t dcpage_flushes; extern atomic_t dcpage_flushes_xcall; extern int sysctl_tsb_ratio; -#endif +#ifdef CONFIG_SERIAL_SUNHV +void sunhv_migrate_hvcons_irq(int cpu); +#endif +#endif void sun_do_break(void); extern int stop_a_enabled; extern int scons_pwroff; diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 2deb89ef1d5f..ca7cb8e57ab0 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1465,8 +1465,12 @@ void smp_send_stop(void) int cpu; if (tlb_type == hypervisor) { + int this_cpu = smp_processor_id(); +#ifdef CONFIG_SERIAL_SUNHV + sunhv_migrate_hvcons_irq(this_cpu); +#endif for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) + if (cpu == this_cpu) continue; #ifdef CONFIG_SUN_LDOMS if (ldom_domaining_enabled) { diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 75725dc4df7a..ad1abc1813da 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -11,6 +11,16 @@ else KBUILD_DEFCONFIG := $(ARCH)_defconfig endif +# For gcc stack alignment is specified with -mpreferred-stack-boundary, +# clang has the option -mstack-alignment for that purpose. +ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) + cc_stack_align4 := -mpreferred-stack-boundary=2 + cc_stack_align8 := -mpreferred-stack-boundary=3 +else ifneq ($(call cc-option, -mstack-alignment=16),) + cc_stack_align4 := -mstack-alignment=4 + cc_stack_align8 := -mstack-alignment=8 +endif + # How to compile the 16-bit code. Note we always compile for -march=i386; # that way we can complain to the user if the CPU is insufficient. # @@ -24,10 +34,11 @@ REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \ -DDISABLE_BRANCH_PROFILING \ -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ - -mno-mmx -mno-sse \ - $(call cc-option, -ffreestanding) \ - $(call cc-option, -fno-stack-protector) \ - $(call cc-option, -mpreferred-stack-boundary=2) + -mno-mmx -mno-sse + +REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -ffreestanding) +REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -fno-stack-protector) +REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align4)) export REALMODE_CFLAGS # BITS is used as extension for files which are available in a 32 bit @@ -64,8 +75,10 @@ ifeq ($(CONFIG_X86_32),y) # with nonstandard options KBUILD_CFLAGS += -fno-pic - # prevent gcc from keeping the stack 16 byte aligned - KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) + # Align the stack to the register width instead of using the default + # alignment of 16 bytes. This reduces stack usage and the number of + # alignment instructions. + KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align4)) # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use # a lot more stack due to the lack of sharing of stacklots: @@ -88,10 +101,10 @@ else KBUILD_CFLAGS += -m64 # Align jump targets to 1 byte, not the default 16 bytes: - KBUILD_CFLAGS += -falign-jumps=1 + KBUILD_CFLAGS += $(call cc-option,-falign-jumps=1) # Pack loops tightly as well: - KBUILD_CFLAGS += -falign-loops=1 + KBUILD_CFLAGS += $(call cc-option,-falign-loops=1) # Don't autogenerate traditional x87 instructions KBUILD_CFLAGS += $(call cc-option,-mno-80387) @@ -99,8 +112,14 @@ else KBUILD_CFLAGS += -fno-pic - # Use -mpreferred-stack-boundary=3 if supported. - KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3) + # By default gcc and clang use a stack alignment of 16 bytes for x86. + # However the standard kernel entry on x86-64 leaves the stack on an + # 8-byte boundary. If the compiler isn't informed about the actual + # alignment it will generate extra alignment instructions for the + # default alignment which keep the stack *mis*aligned. + # Furthermore an alignment to the register width reduces stack usage + # and the number of alignment instructions. + KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align8)) # Use -mskip-rax-setup if supported. KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 9e240fcba784..08dfce02362c 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -16,6 +16,15 @@ #include "ctype.h" #include "string.h" +/* + * Undef these macros so that the functions that we provide + * here will have the correct names regardless of how string.h + * may have chosen to #define them. + */ +#undef memcpy +#undef memset +#undef memcmp + int memcmp(const void *s1, const void *s2, size_t len) { bool diff; diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S index a916c4a61165..5f6a5af9c489 100644 --- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S +++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S @@ -65,7 +65,6 @@ #include #include -#define CONCAT(a,b) a##b #define VMOVDQ vmovdqu #define xdata0 %xmm0 @@ -92,8 +91,6 @@ #define num_bytes %r8 #define tmp %r10 -#define DDQ(i) CONCAT(ddq_add_,i) -#define XMM(i) CONCAT(%xmm, i) #define DDQ_DATA 0 #define XDATA 1 #define KEY_128 1 @@ -131,12 +128,12 @@ ddq_add_8: /* generate a unique variable for ddq_add_x */ .macro setddq n - var_ddq_add = DDQ(\n) + var_ddq_add = ddq_add_\n .endm /* generate a unique variable for xmm register */ .macro setxdata n - var_xdata = XMM(\n) + var_xdata = %xmm\n .endm /* club the numeric 'id' to the symbol 'name' */ diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index e7636bac7372..6c98821fef5e 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -62,8 +62,10 @@ #define new_len2 145f-144f /* - * max without conditionals. Idea adapted from: + * gas compatible max based on the idea from: * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax + * + * The additional "-" is needed because gas uses a "true" value of -1. */ #define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 1b020381ab38..ccbe24e697c4 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -103,12 +103,12 @@ static inline int alternatives_text_reserved(void *start, void *end) alt_end_marker ":\n" /* - * max without conditionals. Idea adapted from: + * gas compatible max based on the idea from: * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax * - * The additional "-" is needed because gas works with s32s. + * The additional "-" is needed because gas uses a "true" value of -1. */ -#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))" +#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))" /* * Pad the second replacement alternative with additional NOPs if it is @@ -218,10 +218,9 @@ static inline int alternatives_text_reserved(void *start, void *end) #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ output, input...) \ { \ - register void *__sp asm(_ASM_SP); \ asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ "call %P[new2]", feature2) \ - : output, "+r" (__sp) \ + : output, ASM_CALL_CONSTRAINT \ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ [new2] "i" (newfunc2), ## input); \ } diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 7acb51c49fec..be70795a4c1e 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -32,6 +32,7 @@ #define _ASM_ADD __ASM_SIZE(add) #define _ASM_SUB __ASM_SIZE(sub) #define _ASM_XADD __ASM_SIZE(xadd) +#define _ASM_MUL __ASM_SIZE(mul) #define _ASM_AX __ASM_REG(ax) #define _ASM_BX __ASM_REG(bx) @@ -125,4 +126,15 @@ /* For C file, we already have NOKPROBE_SYMBOL macro */ #endif +#ifndef __ASSEMBLY__ +/* + * This output constraint should be used for any inline asm which has a "call" + * instruction. Otherwise the asm may be inserted before the frame pointer + * gets set up by the containing function. If you forget to do this, objtool + * may print a "call without frame pointer save/setup" warning. + */ +register unsigned int __asm_call_sp asm("esp"); +#define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp) +#endif + #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 19d14ac23ef9..fc3c7e49c8e4 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -296,6 +296,7 @@ struct x86_emulate_ctxt { bool perm_ok; /* do not check permissions if true */ bool ud; /* inject an #UD if host doesn't support insn */ + bool tf; /* TF value before instruction (after for syscall/sysret) */ bool have_exception; struct x86_exception exception; diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 0f400c0e4979..bc5902c9240b 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -459,8 +459,8 @@ int paravirt_disable_iospace(void); */ #ifdef CONFIG_X86_32 #define PVOP_VCALL_ARGS \ - unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx; \ - register void *__sp asm("esp") + unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx; + #define PVOP_CALL_ARGS PVOP_VCALL_ARGS #define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) @@ -480,8 +480,8 @@ int paravirt_disable_iospace(void); /* [re]ax isn't an arg, but the return val */ #define PVOP_VCALL_ARGS \ unsigned long __edi = __edi, __esi = __esi, \ - __edx = __edx, __ecx = __ecx, __eax = __eax; \ - register void *__sp asm("rsp") + __edx = __edx, __ecx = __ecx, __eax = __eax; + #define PVOP_CALL_ARGS PVOP_VCALL_ARGS #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) @@ -520,7 +520,7 @@ int paravirt_disable_iospace(void); asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : call_clbr, "+r" (__sp) \ + : call_clbr, ASM_CALL_CONSTRAINT \ : paravirt_type(op), \ paravirt_clobber(clbr), \ ##__VA_ARGS__ \ @@ -530,7 +530,7 @@ int paravirt_disable_iospace(void); asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : call_clbr, "+r" (__sp) \ + : call_clbr, ASM_CALL_CONSTRAINT \ : paravirt_type(op), \ paravirt_clobber(clbr), \ ##__VA_ARGS__ \ @@ -557,7 +557,7 @@ int paravirt_disable_iospace(void); asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : call_clbr, "+r" (__sp) \ + : call_clbr, ASM_CALL_CONSTRAINT \ : paravirt_type(op), \ paravirt_clobber(clbr), \ ##__VA_ARGS__ \ diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 17f218645701..4939f6ed1a49 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -94,19 +94,14 @@ static __always_inline bool should_resched(int preempt_offset) #ifdef CONFIG_PREEMPT extern asmlinkage void ___preempt_schedule(void); -# define __preempt_schedule() \ -({ \ - register void *__sp asm(_ASM_SP); \ - asm volatile ("call ___preempt_schedule" : "+r"(__sp)); \ -}) +# define __preempt_schedule() \ + asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT) extern asmlinkage void preempt_schedule(void); extern asmlinkage void ___preempt_schedule_notrace(void); -# define __preempt_schedule_notrace() \ -({ \ - register void *__sp asm(_ASM_SP); \ - asm volatile ("call ___preempt_schedule_notrace" : "+r"(__sp)); \ -}) +# define __preempt_schedule_notrace() \ + asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT) + extern asmlinkage void preempt_schedule_notrace(void); #endif diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index a34e0d4b957d..7116b7931c7b 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -103,7 +103,6 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) ({ \ long tmp; \ struct rw_semaphore* ret; \ - register void *__sp asm(_ASM_SP); \ \ asm volatile("# beginning down_write\n\t" \ LOCK_PREFIX " xadd %1,(%4)\n\t" \ @@ -114,7 +113,8 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) " call " slow_path "\n" \ "1:\n" \ "# ending down_write" \ - : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \ + : "+m" (sem->count), "=d" (tmp), \ + "=a" (ret), ASM_CALL_CONSTRAINT \ : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \ : "memory", "cc"); \ ret; \ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index a300aa10ebc5..c5bf13c382ea 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -157,11 +157,11 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) ({ \ int __ret_gu; \ register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \ - register void *__sp asm(_ASM_SP); \ __chk_user_ptr(ptr); \ might_fault(); \ asm volatile("call __get_user_%P4" \ - : "=a" (__ret_gu), "=r" (__val_gu), "+r" (__sp) \ + : "=a" (__ret_gu), "=r" (__val_gu), \ + ASM_CALL_CONSTRAINT \ : "0" (ptr), "i" (sizeof(*(ptr)))); \ (x) = (__force __typeof__(*(ptr))) __val_gu; \ __builtin_expect(__ret_gu, 0); \ diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 8b678af866f7..dc8a00a93f73 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -111,10 +111,9 @@ extern struct { char _entry[32]; } hypercall_page[]; register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \ register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \ register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \ - register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; \ - register void *__sp asm(_ASM_SP); + register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; -#define __HYPERCALL_0PARAM "=r" (__res), "+r" (__sp) +#define __HYPERCALL_0PARAM "=r" (__res), ASM_CALL_CONSTRAINT #define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1) #define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2) #define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index d3e0d049a0c2..b89bef95f63b 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -176,10 +176,15 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled) return -EINVAL; } + if (!enabled) { + ++disabled_cpus; + return -EINVAL; + } + if (boot_cpu_physical_apicid != -1U) ver = boot_cpu_apic_version; - cpu = __generic_processor_info(id, ver, enabled); + cpu = generic_processor_info(id, ver); if (cpu >= 0) early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f2234918e494..e2ead34da465 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2070,7 +2070,7 @@ static int allocate_logical_cpuid(int apicid) return nr_logical_cpuids++; } -int __generic_processor_info(int apicid, int version, bool enabled) +int generic_processor_info(int apicid, int version) { int cpu, max = nr_cpu_ids; bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid, @@ -2128,11 +2128,9 @@ int __generic_processor_info(int apicid, int version, bool enabled) if (num_processors >= nr_cpu_ids) { int thiscpu = max + disabled_cpus; - if (enabled) { - pr_warning("APIC: NR_CPUS/possible_cpus limit of %i " - "reached. Processor %d/0x%x ignored.\n", - max, thiscpu, apicid); - } + pr_warning("APIC: NR_CPUS/possible_cpus limit of %i " + "reached. Processor %d/0x%x ignored.\n", + max, thiscpu, apicid); disabled_cpus++; return -EINVAL; @@ -2184,23 +2182,13 @@ int __generic_processor_info(int apicid, int version, bool enabled) apic->x86_32_early_logical_apicid(cpu); #endif set_cpu_possible(cpu, true); - - if (enabled) { - num_processors++; - physid_set(apicid, phys_cpu_present_map); - set_cpu_present(cpu, true); - } else { - disabled_cpus++; - } + physid_set(apicid, phys_cpu_present_map); + set_cpu_present(cpu, true); + num_processors++; return cpu; } -int generic_processor_info(int apicid, int version) -{ - return __generic_processor_info(apicid, version, true); -} - int hard_smp_processor_id(void) { return read_apic_id(); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index cdc0deab00c9..13dbcc0f9d03 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -1046,6 +1047,18 @@ static int get_ucode_fw(void *to, const void *from, size_t n) return 0; } +static bool is_blacklisted(unsigned int cpu) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) { + pr_err_once("late loading on model 79 is disabled.\n"); + return true; + } + + return false; +} + static enum ucode_state request_microcode_fw(int cpu, struct device *device, bool refresh_fw) { @@ -1054,6 +1067,9 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device, const struct firmware *firmware; enum ucode_state ret; + if (is_blacklisted(cpu)) + return UCODE_NFOUND; + sprintf(name, "intel-ucode/%02x-%02x-%02x", c->x86, c->x86_model, c->x86_mask); @@ -1078,6 +1094,9 @@ static int get_ucode_user(void *to, const void *from, size_t n) static enum ucode_state request_microcode_user(int cpu, const void __user *buf, size_t size) { + if (is_blacklisted(cpu)) + return UCODE_NFOUND; + return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); } diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index c114b132d121..7052d9a65fe9 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -130,11 +130,16 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, fpu__activate_fpstate_write(fpu); - if (boot_cpu_has(X86_FEATURE_XSAVES)) + if (boot_cpu_has(X86_FEATURE_XSAVES)) { ret = copyin_to_xsaves(kbuf, ubuf, xsave); - else + } else { ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); + /* xcomp_bv must be 0 when using uncompacted format */ + if (!ret && xsave->header.xcomp_bv) + ret = -EINVAL; + } + /* * In case of failure, mark all states as init: */ diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index a184c210efba..3ec0d2d64601 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -329,6 +329,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } else { err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); + + /* xcomp_bv must be 0 when using uncompacted format */ + if (!err && state_size > offsetof(struct xregs_state, header) && fpu->state.xsave.header.xcomp_bv) + err = -EINVAL; } if (err || __copy_from_user(&env, buf, sizeof(env))) { diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 55ffd9dc2258..77f17cbfe271 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -141,7 +141,8 @@ void kvm_async_pf_task_wait(u32 token) n.token = token; n.cpu = smp_processor_id(); - n.halted = is_idle_task(current) || preempt_count() > 1; + n.halted = is_idle_task(current) || preempt_count() > 1 || + rcu_preempt_depth(); init_swait_queue_head(&n.wq); hlist_add_head(&n.link, &b->list); raw_spin_unlock(&b->lock); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index de36660751b5..9606a9d6b1a9 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2738,6 +2738,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); } + ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0; return X86EMUL_CONTINUE; } @@ -5280,7 +5281,6 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) { - register void *__sp asm(_ASM_SP); ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; if (!(ctxt->d & ByteOp)) @@ -5288,7 +5288,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), - [fastop]"+S"(fop), "+r"(__sp) + [fastop]"+S"(fop), ASM_CALL_CONSTRAINT : "c"(ctxt->src2.val)); ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5f2412704b81..d29c745f10ad 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3648,13 +3648,6 @@ static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte) { - /* - * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set - * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means - * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then. - */ - gpte |= level - PT_PAGE_TABLE_LEVEL - 1; - /* * The RHS has bit 7 set iff level < mmu->last_nonleaf_level. * If it is clear, there are no large pages at this level, so clear @@ -3662,6 +3655,13 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, */ gpte &= level - mmu->last_nonleaf_level; + /* + * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set + * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means + * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then. + */ + gpte |= level - PT_PAGE_TABLE_LEVEL - 1; + return gpte & PT_PAGE_SIZE_MASK; } @@ -4169,6 +4169,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly) update_permission_bitmask(vcpu, context, true); update_pkru_bitmask(vcpu, context, true); + update_last_nonleaf_level(vcpu, context); reset_rsvds_bits_mask_ept(vcpu, context, execonly); reset_ept_shadow_zero_bits_mask(vcpu, context, execonly); } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a01105485315..37363900297d 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -324,10 +324,11 @@ retry_walk: --walker->level; index = PT_INDEX(addr, walker->level); - table_gfn = gpte_to_gfn(pte); offset = index * sizeof(pt_element_t); pte_gpa = gfn_to_gpa(table_gfn) + offset; + + BUG_ON(walker->level < 1); walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3dc6d8017ce9..315f609ed362 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2167,46 +2167,44 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) struct pi_desc old, new; unsigned int dest; - if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(vcpu)) + /* + * In case of hot-plug or hot-unplug, we may have to undo + * vmx_vcpu_pi_put even if there is no assigned device. And we + * always keep PI.NDST up to date for simplicity: it makes the + * code easier, and CPU migration is not a fast path. + */ + if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) return; + /* + * First handle the simple case where no cmpxchg is necessary; just + * allow posting non-urgent interrupts. + * + * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change + * PI.NDST: pi_post_block will do it for us and the wakeup_handler + * expects the VCPU to be on the blocked_vcpu_list that matches + * PI.NDST. + */ + if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || + vcpu->cpu == cpu) { + pi_clear_sn(pi_desc); + return; + } + + /* The full case. */ do { old.control = new.control = pi_desc->control; - /* - * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there - * are two possible cases: - * 1. After running 'pre_block', context switch - * happened. For this case, 'sn' was set in - * vmx_vcpu_put(), so we need to clear it here. - * 2. After running 'pre_block', we were blocked, - * and woken up by some other guy. For this case, - * we don't need to do anything, 'pi_post_block' - * will do everything for us. However, we cannot - * check whether it is case #1 or case #2 here - * (maybe, not needed), so we also clear sn here, - * I think it is not a big deal. - */ - if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) { - if (vcpu->cpu != cpu) { - dest = cpu_physical_id(cpu); + dest = cpu_physical_id(cpu); - if (x2apic_enabled()) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; - } + if (x2apic_enabled()) + new.ndst = dest; + else + new.ndst = (dest << 8) & 0xFF00; - /* set 'NV' to 'notification vector' */ - new.nv = POSTED_INTR_VECTOR; - } - - /* Allow posting non-urgent interrupts */ new.sn = 0; - } while (cmpxchg(&pi_desc->control, old.control, - new.control) != old.control); + } while (cmpxchg64(&pi_desc->control, old.control, + new.control) != old.control); } static void decache_tsc_multiplier(struct vcpu_vmx *vmx) @@ -4761,21 +4759,30 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) { #ifdef CONFIG_SMP if (vcpu->mode == IN_GUEST_MODE) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - /* - * Currently, we don't support urgent interrupt, - * all interrupts are recognized as non-urgent - * interrupt, so we cannot post interrupts when - * 'SN' is set. + * The vector of interrupt to be delivered to vcpu had + * been set in PIR before this function. * - * If the vcpu is in guest mode, it means it is - * running instead of being scheduled out and - * waiting in the run queue, and that's the only - * case when 'SN' is set currently, warning if - * 'SN' is set. + * Following cases will be reached in this block, and + * we always send a notification event in all cases as + * explained below. + * + * Case 1: vcpu keeps in non-root mode. Sending a + * notification event posts the interrupt to vcpu. + * + * Case 2: vcpu exits to root mode and is still + * runnable. PIR will be synced to vIRR before the + * next vcpu entry. Sending a notification event in + * this case has no effect, as vcpu is not in root + * mode. + * + * Case 3: vcpu exits to root mode and is blocked. + * vcpu_block() has already synced PIR to vIRR and + * never blocks vcpu if vIRR is not cleared. Therefore, + * a blocked vcpu here does not wait for any requested + * interrupts in PIR, and sending a notification event + * which has no effect is safe here. */ - WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc)); apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), POSTED_INTR_VECTOR); @@ -8632,7 +8639,6 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) { u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - register void *__sp asm(_ASM_SP); /* * If external interrupt exists, IF bit is set in rflags/eflags on the @@ -8666,7 +8672,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) #ifdef CONFIG_X86_64 [sp]"=&r"(tmp), #endif - "+r"(__sp) + ASM_CALL_CONSTRAINT : [entry]"r"(entry), [ss]"i"(__KERNEL_DS), @@ -9187,6 +9193,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; + /* + * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR + * or POSTED_INTR_WAKEUP_VECTOR. + */ + vmx->pi_desc.nv = POSTED_INTR_VECTOR; + vmx->pi_desc.sn = 1; + return &vmx->vcpu; free_vmcs: @@ -9996,6 +10009,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, page_to_phys(vmx->nested.virtual_apic_page)); vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); + } else { +#ifdef CONFIG_X86_64 + exec_control |= CPU_BASED_CR8_LOAD_EXITING | + CPU_BASED_CR8_STORE_EXITING; +#endif } if (cpu_has_vmx_msr_bitmap() && @@ -10671,7 +10689,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, * (KVM doesn't change it)- no reason to call set_cr4_guest_host_mask(); */ vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); - kvm_set_cr4(vcpu, vmcs12->host_cr4); + vmx_set_cr4(vcpu, vmcs12->host_cr4); nested_ept_uninit_mmu_context(vcpu); @@ -11000,6 +11018,37 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); } +static void __pi_post_block(struct kvm_vcpu *vcpu) +{ + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + struct pi_desc old, new; + unsigned int dest; + + do { + old.control = new.control = pi_desc->control; + WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR, + "Wakeup handler not enabled while the VCPU is blocked\n"); + + dest = cpu_physical_id(vcpu->cpu); + + if (x2apic_enabled()) + new.ndst = dest; + else + new.ndst = (dest << 8) & 0xFF00; + + /* set 'NV' to 'notification vector' */ + new.nv = POSTED_INTR_VECTOR; + } while (cmpxchg64(&pi_desc->control, old.control, + new.control) != old.control); + + if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + list_del(&vcpu->blocked_vcpu_list); + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + vcpu->pre_pcpu = -1; + } +} + /* * This routine does the following things for vCPU which is going * to be blocked if VT-d PI is enabled. @@ -11015,7 +11064,6 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, */ static int pi_pre_block(struct kvm_vcpu *vcpu) { - unsigned long flags; unsigned int dest; struct pi_desc old, new; struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); @@ -11025,34 +11073,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu) !kvm_vcpu_apicv_active(vcpu)) return 0; - vcpu->pre_pcpu = vcpu->cpu; - spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); - list_add_tail(&vcpu->blocked_vcpu_list, - &per_cpu(blocked_vcpu_on_cpu, - vcpu->pre_pcpu)); - spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); + WARN_ON(irqs_disabled()); + local_irq_disable(); + if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { + vcpu->pre_pcpu = vcpu->cpu; + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + list_add_tail(&vcpu->blocked_vcpu_list, + &per_cpu(blocked_vcpu_on_cpu, + vcpu->pre_pcpu)); + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + } do { old.control = new.control = pi_desc->control; - /* - * We should not block the vCPU if - * an interrupt is posted for it. - */ - if (pi_test_on(pi_desc) == 1) { - spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); - list_del(&vcpu->blocked_vcpu_list); - spin_unlock_irqrestore( - &per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); - vcpu->pre_pcpu = -1; - - return 1; - } - WARN((pi_desc->sn == 1), "Warning: SN field of posted-interrupts " "is set before blocking\n"); @@ -11074,10 +11108,15 @@ static int pi_pre_block(struct kvm_vcpu *vcpu) /* set 'NV' to 'wakeup vector' */ new.nv = POSTED_INTR_WAKEUP_VECTOR; - } while (cmpxchg(&pi_desc->control, old.control, - new.control) != old.control); + } while (cmpxchg64(&pi_desc->control, old.control, + new.control) != old.control); - return 0; + /* We should not block the vCPU if an interrupt is posted for it. */ + if (pi_test_on(pi_desc) == 1) + __pi_post_block(vcpu); + + local_irq_enable(); + return (vcpu->pre_pcpu == -1); } static int vmx_pre_block(struct kvm_vcpu *vcpu) @@ -11093,44 +11132,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu) static void pi_post_block(struct kvm_vcpu *vcpu) { - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - struct pi_desc old, new; - unsigned int dest; - unsigned long flags; - - if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(vcpu)) + if (vcpu->pre_pcpu == -1) return; - do { - old.control = new.control = pi_desc->control; - - dest = cpu_physical_id(vcpu->cpu); - - if (x2apic_enabled()) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; - - /* Allow posting non-urgent interrupts */ - new.sn = 0; - - /* set 'NV' to 'notification vector' */ - new.nv = POSTED_INTR_VECTOR; - } while (cmpxchg(&pi_desc->control, old.control, - new.control) != old.control); - - if(vcpu->pre_pcpu != -1) { - spin_lock_irqsave( - &per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); - list_del(&vcpu->blocked_vcpu_list); - spin_unlock_irqrestore( - &per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); - vcpu->pre_pcpu = -1; - } + WARN_ON(irqs_disabled()); + local_irq_disable(); + __pi_post_block(vcpu); + local_irq_enable(); } static void vmx_post_block(struct kvm_vcpu *vcpu) @@ -11158,7 +11166,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, struct kvm_lapic_irq irq; struct kvm_vcpu *vcpu; struct vcpu_data vcpu_info; - int idx, ret = -EINVAL; + int idx, ret = 0; if (!kvm_arch_has_assigned_device(kvm) || !irq_remapping_cap(IRQ_POSTING_CAP) || @@ -11167,7 +11175,12 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, idx = srcu_read_lock(&kvm->irq_srcu); irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - BUG_ON(guest_irq >= irq_rt->nr_rt_entries); + if (guest_irq >= irq_rt->nr_rt_entries || + hlist_empty(&irq_rt->map[guest_irq])) { + pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n", + guest_irq, irq_rt->nr_rt_entries); + goto out; + } hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { if (e->type != KVM_IRQ_ROUTING_MSI) @@ -11210,12 +11223,8 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, if (set) ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); - else { - /* suppress notification event before unposting */ - pi_set_sn(vcpu_to_pi_desc(vcpu)); + else ret = irq_set_vcpu_affinity(host_irq, NULL); - pi_clear_sn(vcpu_to_pi_desc(vcpu)); - } if (ret < 0) { printk(KERN_INFO "%s: failed to update PI IRTE\n", diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3dbcb09c19cf..595f8149c0d9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5250,6 +5250,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); ctxt->eflags = kvm_get_rflags(vcpu); + ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0; + ctxt->eip = kvm_rip_read(vcpu); ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 : @@ -5465,37 +5467,26 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, return dr6; } -static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r) +static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r) { struct kvm_run *kvm_run = vcpu->run; - /* - * rflags is the old, "raw" value of the flags. The new value has - * not been saved yet. - * - * This is correct even for TF set by the guest, because "the - * processor will not generate this exception after the instruction - * that sets the TF flag". - */ - if (unlikely(rflags & X86_EFLAGS_TF)) { - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { - kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | - DR6_RTM; - kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; - kvm_run->debug.arch.exception = DB_VECTOR; - kvm_run->exit_reason = KVM_EXIT_DEBUG; - *r = EMULATE_USER_EXIT; - } else { - vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF; - /* - * "Certain debug exceptions may clear bit 0-3. The - * remaining contents of the DR6 register are never - * cleared by the processor". - */ - vcpu->arch.dr6 &= ~15; - vcpu->arch.dr6 |= DR6_BS | DR6_RTM; - kvm_queue_exception(vcpu, DB_VECTOR); - } + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { + kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM; + kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; + kvm_run->debug.arch.exception = DB_VECTOR; + kvm_run->exit_reason = KVM_EXIT_DEBUG; + *r = EMULATE_USER_EXIT; + } else { + vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF; + /* + * "Certain debug exceptions may clear bit 0-3. The + * remaining contents of the DR6 register are never + * cleared by the processor". + */ + vcpu->arch.dr6 &= ~15; + vcpu->arch.dr6 |= DR6_BS | DR6_RTM; + kvm_queue_exception(vcpu, DB_VECTOR); } } @@ -5650,8 +5641,9 @@ restart: toggle_interruptibility(vcpu, ctxt->interruptibility); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; kvm_rip_write(vcpu, ctxt->eip); - if (r == EMULATE_DONE) - kvm_vcpu_check_singlestep(vcpu, rflags, &r); + if (r == EMULATE_DONE && + (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) + kvm_vcpu_do_singlestep(vcpu, &r); if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) __kvm_set_rflags(vcpu, ctxt->eflags); diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c index 121f59c6ee54..0c7fe444dcdd 100644 --- a/arch/x86/lib/kaslr.c +++ b/arch/x86/lib/kaslr.c @@ -5,6 +5,7 @@ * kernel starts. This file is included in the compressed kernel and * normally linked in the regular. */ +#include #include #include #include @@ -79,7 +80,7 @@ unsigned long kaslr_get_random_long(const char *purpose) } /* Circular multiply for better bit diffusion */ - asm("mul %3" + asm(_ASM_MUL "%3" : "=a" (random), "=d" (raw) : "a" (random), "rm" (mix_const)); random += raw; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9f72ca3b2669..ee1e68656cb1 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -191,8 +191,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really * faulted on a pte with its pkey=4. */ -static void fill_sig_info_pkey(int si_code, siginfo_t *info, - struct vm_area_struct *vma) +static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey) { /* This is effectively an #ifdef */ if (!boot_cpu_has(X86_FEATURE_OSPKE)) @@ -208,7 +207,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, * valid VMA, so we should never reach this without a * valid VMA. */ - if (!vma) { + if (!pkey) { WARN_ONCE(1, "PKU fault with no VMA passed in"); info->si_pkey = 0; return; @@ -218,13 +217,12 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, * absolutely guranteed to be 100% accurate because of * the race explained above. */ - info->si_pkey = vma_pkey(vma); + info->si_pkey = *pkey; } static void force_sig_info_fault(int si_signo, int si_code, unsigned long address, - struct task_struct *tsk, struct vm_area_struct *vma, - int fault) + struct task_struct *tsk, u32 *pkey, int fault) { unsigned lsb = 0; siginfo_t info; @@ -239,7 +237,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, lsb = PAGE_SHIFT; info.si_addr_lsb = lsb; - fill_sig_info_pkey(si_code, &info, vma); + fill_sig_info_pkey(si_code, &info, pkey); force_sig_info(si_signo, &info, tsk); } @@ -718,8 +716,6 @@ no_context(struct pt_regs *regs, unsigned long error_code, struct task_struct *tsk = current; unsigned long flags; int sig; - /* No context means no VMA to pass down */ - struct vm_area_struct *vma = NULL; /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs, X86_TRAP_PF)) { @@ -744,7 +740,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, /* XXX: hwpoison faults will set the wrong code. */ force_sig_info_fault(signal, si_code, address, - tsk, vma, 0); + tsk, NULL, 0); } /* @@ -762,7 +758,6 @@ no_context(struct pt_regs *regs, unsigned long error_code, if (is_vmalloc_addr((void *)address) && (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) || address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) { - register void *__sp asm("rsp"); unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *); /* * We're likely to be running with very little stack space @@ -777,7 +772,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, asm volatile ("movq %[stack], %%rsp\n\t" "call handle_stack_overflow\n\t" "1: jmp 1b" - : "+r" (__sp) + : ASM_CALL_CONSTRAINT : "D" ("kernel stack overflow (page fault)"), "S" (regs), "d" (address), [stack] "rm" (stack)); @@ -853,8 +848,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, static void __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, - unsigned long address, struct vm_area_struct *vma, - int si_code) + unsigned long address, u32 *pkey, int si_code) { struct task_struct *tsk = current; @@ -902,7 +896,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_PF; - force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0); + force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0); return; } @@ -915,9 +909,9 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, static noinline void bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, - unsigned long address, struct vm_area_struct *vma) + unsigned long address, u32 *pkey) { - __bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR); + __bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR); } static void @@ -925,6 +919,10 @@ __bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address, struct vm_area_struct *vma, int si_code) { struct mm_struct *mm = current->mm; + u32 pkey; + + if (vma) + pkey = vma_pkey(vma); /* * Something tried to access memory that isn't in our memory map.. @@ -932,7 +930,8 @@ __bad_area(struct pt_regs *regs, unsigned long error_code, */ up_read(&mm->mmap_sem); - __bad_area_nosemaphore(regs, error_code, address, vma, si_code); + __bad_area_nosemaphore(regs, error_code, address, + (vma) ? &pkey : NULL, si_code); } static noinline void @@ -975,7 +974,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code, static void do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, - struct vm_area_struct *vma, unsigned int fault) + u32 *pkey, unsigned int fault) { struct task_struct *tsk = current; int code = BUS_ADRERR; @@ -1002,13 +1001,12 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, code = BUS_MCEERR_AR; } #endif - force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault); + force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault); } static noinline void mm_fault_error(struct pt_regs *regs, unsigned long error_code, - unsigned long address, struct vm_area_struct *vma, - unsigned int fault) + unsigned long address, u32 *pkey, unsigned int fault) { if (fatal_signal_pending(current) && !(error_code & PF_USER)) { no_context(regs, error_code, address, 0, 0); @@ -1032,9 +1030,9 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, } else { if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| VM_FAULT_HWPOISON_LARGE)) - do_sigbus(regs, error_code, address, vma, fault); + do_sigbus(regs, error_code, address, pkey, fault); else if (fault & VM_FAULT_SIGSEGV) - bad_area_nosemaphore(regs, error_code, address, vma); + bad_area_nosemaphore(regs, error_code, address, pkey); else BUG(); } @@ -1220,6 +1218,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, struct mm_struct *mm; int fault, major = 0; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + u32 pkey; tsk = current; mm = tsk->mm; @@ -1420,9 +1419,10 @@ good_area: return; } + pkey = vma_pkey(vma); up_read(&mm->mmap_sem); if (unlikely(fault & VM_FAULT_ERROR)) { - mm_fault_error(regs, error_code, address, vma, fault); + mm_fault_error(regs, error_code, address, &pkey, fault); return; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9a324fc8bed8..3e27ded6ac65 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -689,7 +689,7 @@ static void __meminit free_pagetable(struct page *page, int order) if (PageReserved(page)) { __ClearPageReserved(page); - magic = (unsigned long)page->lru.next; + magic = (unsigned long)page->freelist; if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { while (nr_pages--) put_page_bootmem(page++); diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 555b9fa0ad43..7dbdb780264d 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -8,6 +8,7 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib targets += purgatory.ro +KASAN_SANITIZE := n KCOV_INSTRUMENT := n # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That diff --git a/block/bio.c b/block/bio.c index 655c9016052a..07f287b14cff 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1171,8 +1171,8 @@ struct bio *bio_copy_user_iov(struct request_queue *q, */ bmd->is_our_pages = map_data ? 0 : 1; memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs); - iov_iter_init(&bmd->iter, iter->type, bmd->iov, - iter->nr_segs, iter->count); + bmd->iter = *iter; + bmd->iter.iov = bmd->iov; ret = -ENOMEM; bio = bio_kmalloc(gfp_mask, nr_pages); @@ -1266,6 +1266,7 @@ struct bio *bio_map_user_iov(struct request_queue *q, int ret, offset; struct iov_iter i; struct iovec iov; + struct bio_vec *bvec; iov_for_each(iov, i, *iter) { unsigned long uaddr = (unsigned long) iov.iov_base; @@ -1310,7 +1311,12 @@ struct bio *bio_map_user_iov(struct request_queue *q, ret = get_user_pages_fast(uaddr, local_nr_pages, (iter->type & WRITE) != WRITE, &pages[cur_page]); - if (ret < local_nr_pages) { + if (unlikely(ret < local_nr_pages)) { + for (j = cur_page; j < page_limit; j++) { + if (!pages[j]) + break; + put_page(pages[j]); + } ret = -EFAULT; goto out_unmap; } @@ -1318,6 +1324,7 @@ struct bio *bio_map_user_iov(struct request_queue *q, offset = offset_in_page(uaddr); for (j = cur_page; j < page_limit; j++) { unsigned int bytes = PAGE_SIZE - offset; + unsigned short prev_bi_vcnt = bio->bi_vcnt; if (len <= 0) break; @@ -1332,6 +1339,13 @@ struct bio *bio_map_user_iov(struct request_queue *q, bytes) break; + /* + * check if vector was merged with previous + * drop page reference if needed + */ + if (bio->bi_vcnt == prev_bi_vcnt) + put_page(pages[j]); + len -= bytes; offset = 0; } @@ -1364,10 +1378,8 @@ struct bio *bio_map_user_iov(struct request_queue *q, return bio; out_unmap: - for (j = 0; j < nr_pages; j++) { - if (!pages[j]) - break; - put_page(pages[j]); + bio_for_each_segment_all(bvec, bio, j) { + put_page(bvec->bv_page); } out: kfree(pages); diff --git a/block/blk-core.c b/block/blk-core.c index 710c93ba11fe..b232290f1068 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -235,7 +235,7 @@ EXPORT_SYMBOL(blk_start_queue_async); **/ void blk_start_queue(struct request_queue *q) { - WARN_ON(!irqs_disabled()); + WARN_ON(!in_interrupt() && !irqs_disabled()); queue_flag_clear(QUEUE_FLAG_STOPPED, q); __blk_run_queue(q); diff --git a/block/partitions/efi.c b/block/partitions/efi.c index bcd86e5cd546..39f70d968754 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -293,7 +293,7 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state, if (!gpt) return NULL; - count = le32_to_cpu(gpt->num_partition_entries) * + count = (size_t)le32_to_cpu(gpt->num_partition_entries) * le32_to_cpu(gpt->sizeof_partition_entry); if (!count) return NULL; @@ -352,7 +352,7 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba, gpt_header **gpt, gpt_entry **ptes) { u32 crc, origcrc; - u64 lastlba; + u64 lastlba, pt_size; if (!ptes) return 0; @@ -434,13 +434,20 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba, goto fail; } + /* Sanity check partition table size */ + pt_size = (u64)le32_to_cpu((*gpt)->num_partition_entries) * + le32_to_cpu((*gpt)->sizeof_partition_entry); + if (pt_size > KMALLOC_MAX_SIZE) { + pr_debug("GUID Partition Table is too large: %llu > %lu bytes\n", + (unsigned long long)pt_size, KMALLOC_MAX_SIZE); + goto fail; + } + if (!(*ptes = alloc_read_gpt_entries(state, *gpt))) goto fail; /* Check the GUID Partition Entry Array CRC */ - crc = efi_crc32((const unsigned char *) (*ptes), - le32_to_cpu((*gpt)->num_partition_entries) * - le32_to_cpu((*gpt)->sizeof_partition_entry)); + crc = efi_crc32((const unsigned char *) (*ptes), pt_size); if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) { pr_debug("GUID Partition Entry Array CRC check failed.\n"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 84d71482bf08..fa98ad7edb60 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -360,6 +360,7 @@ config CRYPTO_XTS select CRYPTO_BLKCIPHER select CRYPTO_MANAGER select CRYPTO_GF128MUL + select CRYPTO_ECB help XTS: IEEE1619/D16 narrow block cipher use with aes-xts-plain, key size 256, 384 or 512 bits. This implementation currently diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 45af0fe00f33..aaf2f810d170 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -143,8 +143,10 @@ static int skcipher_alloc_sgl(struct sock *sk) sg_init_table(sgl->sg, MAX_SGL_ENTS + 1); sgl->cur = 0; - if (sg) + if (sg) { sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg); + sg_unmark_end(sg + (MAX_SGL_ENTS - 1)); + } list_add_tail(&sgl->list, &ctx->tsgl); } diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c index af4cd8649117..d140d8bb2c96 100644 --- a/crypto/asymmetric_keys/pkcs7_parser.c +++ b/crypto/asymmetric_keys/pkcs7_parser.c @@ -88,6 +88,9 @@ static int pkcs7_check_authattrs(struct pkcs7_message *msg) bool want = false; sinfo = msg->signed_infos; + if (!sinfo) + goto inconsistent; + if (sinfo->authattrs) { want = true; msg->have_authattrs = true; diff --git a/crypto/drbg.c b/crypto/drbg.c index 8cac3d31a5f8..942ddff68408 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1133,10 +1133,10 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg) { if (!drbg) return; - kzfree(drbg->V); - drbg->Vbuf = NULL; - kzfree(drbg->C); - drbg->Cbuf = NULL; + kzfree(drbg->Vbuf); + drbg->V = NULL; + kzfree(drbg->Cbuf); + drbg->C = NULL; kzfree(drbg->scratchpadbuf); drbg->scratchpadbuf = NULL; drbg->reseed_ctr = 0; diff --git a/crypto/shash.c b/crypto/shash.c index a051541a4a17..4d8a671d1614 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -274,12 +274,14 @@ static int shash_async_finup(struct ahash_request *req) int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc) { - struct scatterlist *sg = req->src; - unsigned int offset = sg->offset; unsigned int nbytes = req->nbytes; + struct scatterlist *sg; + unsigned int offset; int err; - if (nbytes < min(sg->length, ((unsigned int)(PAGE_SIZE)) - offset)) { + if (nbytes && + (sg = req->src, offset = sg->offset, + nbytes < min(sg->length, ((unsigned int)(PAGE_SIZE)) - offset))) { void *data; data = kmap_atomic(sg_page(sg)); diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 1ac8008fb54d..2899534b4727 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -600,6 +600,8 @@ enum { * (protected by @proc->inner_lock) * @todo: list of work to do for this thread * (protected by @proc->inner_lock) + * @process_todo: whether work in @todo should be processed + * (protected by @proc->inner_lock) * @return_error: transaction errors reported by this thread * (only accessed by this thread) * @reply_error: transaction errors reported by target thread @@ -626,6 +628,7 @@ struct binder_thread { bool looper_need_return; /* can be written by other thread */ struct binder_transaction *transaction_stack; struct list_head todo; + bool process_todo; struct binder_error return_error; struct binder_error reply_error; wait_queue_head_t wait; @@ -813,6 +816,16 @@ static bool binder_worklist_empty(struct binder_proc *proc, return ret; } +/** + * binder_enqueue_work_ilocked() - Add an item to the work list + * @work: struct binder_work to add to list + * @target_list: list to add work to + * + * Adds the work to the specified list. Asserts that work + * is not already on a list. + * + * Requires the proc->inner_lock to be held. + */ static void binder_enqueue_work_ilocked(struct binder_work *work, struct list_head *target_list) @@ -823,22 +836,56 @@ binder_enqueue_work_ilocked(struct binder_work *work, } /** - * binder_enqueue_work() - Add an item to the work list - * @proc: binder_proc associated with list + * binder_enqueue_thread_work_ilocked_nowake() - Add thread work + * @thread: thread to queue work to * @work: struct binder_work to add to list - * @target_list: list to add work to * - * Adds the work to the specified list. Asserts that work - * is not already on a list. + * Adds the work to the todo list of the thread. Doesn't set the process_todo + * flag, which means that (if it wasn't already set) the thread will go to + * sleep without handling this work when it calls read. + * + * Requires the proc->inner_lock to be held. */ static void -binder_enqueue_work(struct binder_proc *proc, - struct binder_work *work, - struct list_head *target_list) +binder_enqueue_thread_work_ilocked_nowake(struct binder_thread *thread, + struct binder_work *work) { - binder_inner_proc_lock(proc); - binder_enqueue_work_ilocked(work, target_list); - binder_inner_proc_unlock(proc); + binder_enqueue_work_ilocked(work, &thread->todo); +} + +/** + * binder_enqueue_thread_work_ilocked() - Add an item to the thread work list + * @thread: thread to queue work to + * @work: struct binder_work to add to list + * + * Adds the work to the todo list of the thread, and enables processing + * of the todo queue. + * + * Requires the proc->inner_lock to be held. + */ +static void +binder_enqueue_thread_work_ilocked(struct binder_thread *thread, + struct binder_work *work) +{ + binder_enqueue_work_ilocked(work, &thread->todo); + thread->process_todo = true; +} + +/** + * binder_enqueue_thread_work() - Add an item to the thread work list + * @thread: thread to queue work to + * @work: struct binder_work to add to list + * + * Adds the work to the todo list of the thread, and enables processing + * of the todo queue. + */ +static void +binder_enqueue_thread_work(struct binder_thread *thread, + struct binder_work *work) +{ + binder_inner_proc_lock(thread->proc); + binder_enqueue_thread_work_ilocked(thread, work); + binder_inner_proc_unlock(thread->proc); } static void @@ -953,7 +1000,7 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd) static bool binder_has_work_ilocked(struct binder_thread *thread, bool do_proc_work) { - return !binder_worklist_empty_ilocked(&thread->todo) || + return thread->process_todo || thread->looper_need_return || (do_proc_work && !binder_worklist_empty_ilocked(&thread->proc->todo)); @@ -1153,6 +1200,10 @@ static void binder_do_set_priority(struct task_struct *task, task->pid, desired.prio, to_kernel_prio(policy, priority)); + trace_binder_set_priority(task->tgid, task->pid, task->normal_prio, + to_kernel_prio(policy, priority), + desired.prio); + /* Set the actual priority */ if (task->policy != policy || is_rt_policy(policy)) { struct sched_param params; @@ -1184,7 +1235,7 @@ static void binder_transaction_priority(struct task_struct *task, struct binder_priority node_prio, bool inherit_rt) { - struct binder_priority desired_prio; + struct binder_priority desired_prio = t->priority; if (t->set_priority_called) return; @@ -1196,9 +1247,6 @@ static void binder_transaction_priority(struct task_struct *task, if (!inherit_rt && is_rt_policy(desired_prio.sched_policy)) { desired_prio.prio = NICE_TO_PRIO(0); desired_prio.sched_policy = SCHED_NORMAL; - } else { - desired_prio.prio = t->priority.prio; - desired_prio.sched_policy = t->priority.sched_policy; } if (node_prio.prio < t->priority.prio || @@ -1301,7 +1349,7 @@ static struct binder_node *binder_init_node_ilocked( node->cookie = cookie; node->work.type = BINDER_WORK_NODE; priority = flags & FLAT_BINDER_FLAG_PRIORITY_MASK; - node->sched_policy = (flags & FLAT_BINDER_FLAG_PRIORITY_MASK) >> + node->sched_policy = (flags & FLAT_BINDER_FLAG_SCHED_POLICY_MASK) >> FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT; node->min_priority = to_kernel_prio(node->sched_policy, priority); node->accept_fds = !!(flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); @@ -1369,6 +1417,17 @@ static int binder_inc_node_nilocked(struct binder_node *node, int strong, node->local_strong_refs++; if (!node->has_strong_ref && target_list) { binder_dequeue_work_ilocked(&node->work); + /* + * Note: this function is the only place where we queue + * directly to a thread->todo without using the + * corresponding binder_enqueue_thread_work() helper + * functions; in this case it's ok to not set the + * process_todo flag, since we know this node work will + * always be followed by other work that starts queue + * processing: in case of synchronous transactions, a + * BR_REPLY or BR_ERROR; in case of oneway + * transactions, a BR_TRANSACTION_COMPLETE. + */ binder_enqueue_work_ilocked(&node->work, target_list); } } else { @@ -1380,6 +1439,9 @@ static int binder_inc_node_nilocked(struct binder_node *node, int strong, node->debug_id); return -EINVAL; } + /* + * See comment above + */ binder_enqueue_work_ilocked(&node->work, target_list); } } @@ -2069,9 +2131,9 @@ static void binder_send_failed_reply(struct binder_transaction *t, binder_pop_transaction_ilocked(target_thread, t); if (target_thread->reply_error.cmd == BR_OK) { target_thread->reply_error.cmd = error_code; - binder_enqueue_work_ilocked( - &target_thread->reply_error.work, - &target_thread->todo); + binder_enqueue_thread_work_ilocked( + target_thread, + &target_thread->reply_error.work); wake_up_interruptible(&target_thread->wait); } else { WARN(1, "Unexpected reply error: %u\n", @@ -2101,6 +2163,26 @@ static void binder_send_failed_reply(struct binder_transaction *t, } } +/** + * binder_cleanup_transaction() - cleans up undelivered transaction + * @t: transaction that needs to be cleaned up + * @reason: reason the transaction wasn't delivered + * @error_code: error to return to caller (if synchronous call) + */ +static void binder_cleanup_transaction(struct binder_transaction *t, + const char *reason, + uint32_t error_code) +{ + if (t->buffer->target_node && !(t->flags & TF_ONE_WAY)) { + binder_send_failed_reply(t, error_code); + } else { + binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, + "undelivered transaction %d, %s\n", + t->debug_id, reason); + binder_free_transaction(t); + } +} + /** * binder_validate_object() - checks for a valid metadata object in a buffer. * @buffer: binder_buffer that we're parsing. @@ -2690,11 +2772,10 @@ static bool binder_proc_transaction(struct binder_transaction *t, struct binder_proc *proc, struct binder_thread *thread) { - struct list_head *target_list = NULL; struct binder_node *node = t->buffer->target_node; struct binder_priority node_prio; bool oneway = !!(t->flags & TF_ONE_WAY); - bool wakeup = true; + bool pending_async = false; BUG_ON(!node); binder_node_lock(node); @@ -2704,8 +2785,7 @@ static bool binder_proc_transaction(struct binder_transaction *t, if (oneway) { BUG_ON(thread); if (node->has_async_transaction) { - target_list = &node->async_todo; - wakeup = false; + pending_async = true; } else { node->has_async_transaction = 1; } @@ -2719,22 +2799,20 @@ static bool binder_proc_transaction(struct binder_transaction *t, return false; } - if (!thread && !target_list) + if (!thread && !pending_async) thread = binder_select_thread_ilocked(proc); if (thread) { - target_list = &thread->todo; binder_transaction_priority(thread->task, t, node_prio, node->inherit_rt); - } else if (!target_list) { - target_list = &proc->todo; + binder_enqueue_thread_work_ilocked(thread, &t->work); + } else if (!pending_async) { + binder_enqueue_work_ilocked(&t->work, &proc->todo); } else { - BUG_ON(target_list != &node->async_todo); + binder_enqueue_work_ilocked(&t->work, &node->async_todo); } - binder_enqueue_work_ilocked(&t->work, target_list); - - if (wakeup) + if (!pending_async) binder_wakeup_thread_ilocked(proc, thread, !oneway /* sync */); binder_inner_proc_unlock(proc); @@ -2743,6 +2821,48 @@ static bool binder_proc_transaction(struct binder_transaction *t, return true; } +/** + * binder_get_node_refs_for_txn() - Get required refs on node for txn + * @node: struct binder_node for which to get refs + * @proc: returns @node->proc if valid + * @error: if no @proc then returns BR_DEAD_REPLY + * + * User-space normally keeps the node alive when creating a transaction + * since it has a reference to the target. The local strong ref keeps it + * alive if the sending process dies before the target process processes + * the transaction. If the source process is malicious or has a reference + * counting bug, relying on the local strong ref can fail. + * + * Since user-space can cause the local strong ref to go away, we also take + * a tmpref on the node to ensure it survives while we are constructing + * the transaction. We also need a tmpref on the proc while we are + * constructing the transaction, so we take that here as well. + * + * Return: The target_node with refs taken or NULL if no @node->proc is NULL. + * Also sets @proc if valid. If the @node->proc is NULL indicating that the + * target proc has died, @error is set to BR_DEAD_REPLY + */ +static struct binder_node *binder_get_node_refs_for_txn( + struct binder_node *node, + struct binder_proc **procp, + uint32_t *error) +{ + struct binder_node *target_node = NULL; + + binder_node_inner_lock(node); + if (node->proc) { + target_node = node; + binder_inc_node_nilocked(node, 1, 0, NULL); + binder_inc_node_tmpref_ilocked(node); + node->proc->tmp_ref++; + *procp = node->proc; + } else + *error = BR_DEAD_REPLY; + binder_node_inner_unlock(node); + + return target_node; +} + static void binder_transaction(struct binder_proc *proc, struct binder_thread *thread, struct binder_transaction_data *tr, int reply, @@ -2845,43 +2965,35 @@ static void binder_transaction(struct binder_proc *proc, ref = binder_get_ref_olocked(proc, tr->target.handle, true); if (ref) { - binder_inc_node(ref->node, 1, 0, NULL); - target_node = ref->node; + target_node = binder_get_node_refs_for_txn( + ref->node, &target_proc, + &return_error); + } else { + binder_user_error("%d:%d got transaction to invalid handle\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; } binder_proc_unlock(proc); - if (target_node == NULL) { - binder_user_error("%d:%d got transaction to invalid handle\n", - proc->pid, thread->pid); - return_error = BR_FAILED_REPLY; - return_error_param = -EINVAL; - return_error_line = __LINE__; - goto err_invalid_target_handle; - } } else { mutex_lock(&context->context_mgr_node_lock); target_node = context->binder_context_mgr_node; - if (target_node == NULL) { + if (target_node) + target_node = binder_get_node_refs_for_txn( + target_node, &target_proc, + &return_error); + else return_error = BR_DEAD_REPLY; - mutex_unlock(&context->context_mgr_node_lock); - return_error_line = __LINE__; - goto err_no_context_mgr_node; - } - binder_inc_node(target_node, 1, 0, NULL); mutex_unlock(&context->context_mgr_node_lock); } - e->to_node = target_node->debug_id; - binder_node_lock(target_node); - target_proc = target_node->proc; - if (target_proc == NULL) { - binder_node_unlock(target_node); - return_error = BR_DEAD_REPLY; + if (!target_node) { + /* + * return_error is set above + */ + return_error_param = -EINVAL; return_error_line = __LINE__; goto err_dead_binder; } - binder_inner_proc_lock(target_proc); - target_proc->tmp_ref++; - binder_inner_proc_unlock(target_proc); - binder_node_unlock(target_node); + e->to_node = target_node->debug_id; if (security_binder_transaction(proc->tsk, target_proc->tsk) < 0) { return_error = BR_FAILED_REPLY; @@ -3202,10 +3314,10 @@ static void binder_transaction(struct binder_proc *proc, } } tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; - binder_enqueue_work(proc, tcomplete, &thread->todo); t->work.type = BINDER_WORK_TRANSACTION; if (reply) { + binder_enqueue_thread_work(thread, tcomplete); binder_inner_proc_lock(target_proc); if (target_thread->is_dead) { binder_inner_proc_unlock(target_proc); @@ -3213,7 +3325,7 @@ static void binder_transaction(struct binder_proc *proc, } BUG_ON(t->buffer->async_transaction != 0); binder_pop_transaction_ilocked(target_thread, in_reply_to); - binder_enqueue_work_ilocked(&t->work, &target_thread->todo); + binder_enqueue_thread_work_ilocked(target_thread, &t->work); binder_inner_proc_unlock(target_proc); wake_up_interruptible_sync(&target_thread->wait); binder_restore_priority(current, in_reply_to->saved_priority); @@ -3221,6 +3333,7 @@ static void binder_transaction(struct binder_proc *proc, } else if (!(t->flags & TF_ONE_WAY)) { BUG_ON(t->buffer->async_transaction != 0); binder_inner_proc_lock(proc); + binder_enqueue_thread_work_ilocked_nowake(thread, tcomplete); t->need_reply = 1; t->from_parent = thread->transaction_stack; thread->transaction_stack = t; @@ -3234,12 +3347,15 @@ static void binder_transaction(struct binder_proc *proc, } else { BUG_ON(target_node == NULL); BUG_ON(t->buffer->async_transaction != 1); + binder_enqueue_thread_work(thread, tcomplete); if (!binder_proc_transaction(t, target_proc, NULL)) goto err_dead_proc_or_thread; } if (target_thread) binder_thread_dec_tmpref(target_thread); binder_proc_dec_tmpref(target_proc); + if (target_node) + binder_dec_node_tmpref(target_node); /* * write barrier to synchronize with initialization * of log entry @@ -3259,6 +3375,8 @@ err_bad_parent: err_copy_data_failed: trace_binder_transaction_failed_buffer_release(t->buffer); binder_transaction_buffer_release(target_proc, t->buffer, offp); + if (target_node) + binder_dec_node_tmpref(target_node); target_node = NULL; t->buffer->transaction = NULL; binder_alloc_free_buf(&target_proc->alloc, t->buffer); @@ -3273,13 +3391,14 @@ err_bad_call_stack: err_empty_call_stack: err_dead_binder: err_invalid_target_handle: -err_no_context_mgr_node: if (target_thread) binder_thread_dec_tmpref(target_thread); if (target_proc) binder_proc_dec_tmpref(target_proc); - if (target_node) + if (target_node) { binder_dec_node(target_node, 1, 0); + binder_dec_node_tmpref(target_node); + } binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "%d:%d transaction failed %d/%d, size %lld-%lld line %d\n", @@ -3308,15 +3427,11 @@ err_no_context_mgr_node: if (in_reply_to) { binder_restore_priority(current, in_reply_to->saved_priority); thread->return_error.cmd = BR_TRANSACTION_COMPLETE; - binder_enqueue_work(thread->proc, - &thread->return_error.work, - &thread->todo); + binder_enqueue_thread_work(thread, &thread->return_error.work); binder_send_failed_reply(in_reply_to, return_error); } else { thread->return_error.cmd = return_error; - binder_enqueue_work(thread->proc, - &thread->return_error.work, - &thread->todo); + binder_enqueue_thread_work(thread, &thread->return_error.work); } } @@ -3620,10 +3735,9 @@ static int binder_thread_write(struct binder_proc *proc, WARN_ON(thread->return_error.cmd != BR_OK); thread->return_error.cmd = BR_ERROR; - binder_enqueue_work( - thread->proc, - &thread->return_error.work, - &thread->todo); + binder_enqueue_thread_work( + thread, + &thread->return_error.work); binder_debug( BINDER_DEBUG_FAILED_TRANSACTION, "%d:%d BC_REQUEST_DEATH_NOTIFICATION failed\n", @@ -3703,9 +3817,9 @@ static int binder_thread_write(struct binder_proc *proc, if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) - binder_enqueue_work_ilocked( - &death->work, - &thread->todo); + binder_enqueue_thread_work_ilocked( + thread, + &death->work); else { binder_enqueue_work_ilocked( &death->work, @@ -3760,8 +3874,8 @@ static int binder_thread_write(struct binder_proc *proc, if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) - binder_enqueue_work_ilocked( - &death->work, &thread->todo); + binder_enqueue_thread_work_ilocked( + thread, &death->work); else { binder_enqueue_work_ilocked( &death->work, @@ -3935,6 +4049,8 @@ retry: break; } w = binder_dequeue_work_head_ilocked(list); + if (binder_worklist_empty_ilocked(&thread->todo)) + thread->process_todo = false; switch (w->type) { case BINDER_WORK_TRANSACTION: { @@ -4144,12 +4260,20 @@ retry: if (put_user(cmd, (uint32_t __user *)ptr)) { if (t_from) binder_thread_dec_tmpref(t_from); + + binder_cleanup_transaction(t, "put_user failed", + BR_FAILED_REPLY); + return -EFAULT; } ptr += sizeof(uint32_t); if (copy_to_user(ptr, &tr, sizeof(tr))) { if (t_from) binder_thread_dec_tmpref(t_from); + + binder_cleanup_transaction(t, "copy_to_user failed", + BR_FAILED_REPLY); + return -EFAULT; } ptr += sizeof(tr); @@ -4219,15 +4343,9 @@ static void binder_release_work(struct binder_proc *proc, struct binder_transaction *t; t = container_of(w, struct binder_transaction, work); - if (t->buffer->target_node && - !(t->flags & TF_ONE_WAY)) { - binder_send_failed_reply(t, BR_DEAD_REPLY); - } else { - binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, - "undelivered transaction %d\n", - t->debug_id); - binder_free_transaction(t); - } + + binder_cleanup_transaction(t, "process died.", + BR_DEAD_REPLY); } break; case BINDER_WORK_RETURN_ERROR: { struct binder_error *e = container_of( diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 905741129de7..1c76daacfb0b 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -281,6 +281,9 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, goto err_vm_insert_page_failed; } + if (index + 1 > alloc->pages_high) + alloc->pages_high = index + 1; + trace_binder_alloc_page_end(alloc, index); /* vm_insert_page does not seem to increment the refcount */ } @@ -560,7 +563,7 @@ static void binder_delete_free_buffer(struct binder_alloc *alloc, binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, "%d: merge free, buffer %pK do not share page with %pK or %pK\n", alloc->pid, buffer->data, - prev->data, next->data); + prev->data, next ? next->data : NULL); binder_update_page_range(alloc, 0, buffer_start_page(buffer), buffer_start_page(buffer) + PAGE_SIZE); } @@ -854,6 +857,7 @@ void binder_alloc_print_pages(struct seq_file *m, } mutex_unlock(&alloc->mutex); seq_printf(m, " pages: %d:%d:%d\n", active, lru, free); + seq_printf(m, " pages high watermark: %zu\n", alloc->pages_high); } /** @@ -984,7 +988,7 @@ binder_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) return ret; } -struct shrinker binder_shrinker = { +static struct shrinker binder_shrinker = { .count_objects = binder_shrink_count, .scan_objects = binder_shrink_scan, .seeks = DEFAULT_SEEKS, diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h index 2dd33b6df104..0b145307f1fd 100644 --- a/drivers/android/binder_alloc.h +++ b/drivers/android/binder_alloc.h @@ -92,6 +92,7 @@ struct binder_lru_page { * @pages: array of binder_lru_page * @buffer_size: size of address space specified via mmap * @pid: pid for associated binder_proc (invariant after init) + * @pages_high: high watermark of offset in @pages * * Bookkeeping structure for per-proc address space management for binder * buffers. It is normally initialized during binder_init() and binder_mmap() @@ -112,6 +113,7 @@ struct binder_alloc { size_t buffer_size; uint32_t buffer_free; int pid; + size_t pages_high; }; #ifdef CONFIG_ANDROID_BINDER_IPC_SELFTEST diff --git a/drivers/android/binder_trace.h b/drivers/android/binder_trace.h index 76e3b9c8a8a2..b11dffc521e8 100644 --- a/drivers/android/binder_trace.h +++ b/drivers/android/binder_trace.h @@ -85,6 +85,30 @@ DEFINE_BINDER_FUNCTION_RETURN_EVENT(binder_ioctl_done); DEFINE_BINDER_FUNCTION_RETURN_EVENT(binder_write_done); DEFINE_BINDER_FUNCTION_RETURN_EVENT(binder_read_done); +TRACE_EVENT(binder_set_priority, + TP_PROTO(int proc, int thread, unsigned int old_prio, + unsigned int desired_prio, unsigned int new_prio), + TP_ARGS(proc, thread, old_prio, new_prio, desired_prio), + + TP_STRUCT__entry( + __field(int, proc) + __field(int, thread) + __field(unsigned int, old_prio) + __field(unsigned int, new_prio) + __field(unsigned int, desired_prio) + ), + TP_fast_assign( + __entry->proc = proc; + __entry->thread = thread; + __entry->old_prio = old_prio; + __entry->new_prio = new_prio; + __entry->desired_prio = desired_prio; + ), + TP_printk("proc=%d thread=%d old=%d => new=%d desired=%d", + __entry->proc, __entry->thread, __entry->old_prio, + __entry->new_prio, __entry->desired_prio) +); + TRACE_EVENT(binder_wait_for_work, TP_PROTO(bool proc_work, bool transaction_stack, bool thread_todo), TP_ARGS(proc_work, transaction_stack, thread_todo), diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index 7ef16c085058..20e2b7ad8925 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -224,7 +224,6 @@ static DECLARE_TRANSPORT_CLASS(ata_port_class, static void ata_tport_release(struct device *dev) { - put_device(dev->parent); } /** @@ -284,7 +283,7 @@ int ata_tport_add(struct device *parent, device_initialize(dev); dev->type = &ata_port_type; - dev->parent = get_device(parent); + dev->parent = parent; dev->release = ata_tport_release; dev_set_name(dev, "ata%d", ap->print_id); transport_setup_device(dev); @@ -348,7 +347,6 @@ static DECLARE_TRANSPORT_CLASS(ata_link_class, static void ata_tlink_release(struct device *dev) { - put_device(dev->parent); } /** @@ -410,7 +408,7 @@ int ata_tlink_add(struct ata_link *link) int error; device_initialize(dev); - dev->parent = get_device(&ap->tdev); + dev->parent = &ap->tdev; dev->release = ata_tlink_release; if (ata_is_host_link(link)) dev_set_name(dev, "link%d", ap->print_id); @@ -589,7 +587,6 @@ static DECLARE_TRANSPORT_CLASS(ata_dev_class, static void ata_tdev_release(struct device *dev) { - put_device(dev->parent); } /** @@ -662,7 +659,7 @@ static int ata_tdev_add(struct ata_device *ata_dev) int error; device_initialize(dev); - dev->parent = get_device(&link->tdev); + dev->parent = &link->tdev; dev->release = ata_tdev_release; if (ata_is_host_link(link)) dev_set_name(dev, "dev%d.%d", ap->print_id,ata_dev->devno); diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c index 0636d84fbefe..f3f538eec7b3 100644 --- a/drivers/ata/sata_via.c +++ b/drivers/ata/sata_via.c @@ -644,14 +644,16 @@ static void svia_configure(struct pci_dev *pdev, int board_id, pci_write_config_byte(pdev, SATA_NATIVE_MODE, tmp8); } - /* enable IRQ on hotplug */ - pci_read_config_byte(pdev, SVIA_MISC_3, &tmp8); - if ((tmp8 & SATA_HOTPLUG) != SATA_HOTPLUG) { - dev_dbg(&pdev->dev, - "enabling SATA hotplug (0x%x)\n", - (int) tmp8); - tmp8 |= SATA_HOTPLUG; - pci_write_config_byte(pdev, SVIA_MISC_3, tmp8); + if (board_id == vt6421) { + /* enable IRQ on hotplug */ + pci_read_config_byte(pdev, SVIA_MISC_3, &tmp8); + if ((tmp8 & SATA_HOTPLUG) != SATA_HOTPLUG) { + dev_dbg(&pdev->dev, + "enabling SATA hotplug (0x%x)\n", + (int) tmp8); + tmp8 |= SATA_HOTPLUG; + pci_write_config_byte(pdev, SVIA_MISC_3, tmp8); + } } /* diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 5eba47815bb6..14ff40371f01 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -858,7 +858,8 @@ static ssize_t driver_override_store(struct device *dev, struct platform_device *pdev = to_platform_device(dev); char *driver_override, *old, *cp; - if (count > PATH_MAX) + /* We need to keep extra room for a newline */ + if (count >= (PAGE_SIZE - 1)) return -EINVAL; driver_override = kstrndup(buf, count, GFP_KERNEL); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 4f991013f682..dc259d20c967 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1762,10 +1762,13 @@ void device_pm_check_callbacks(struct device *dev) { spin_lock_irq(&dev->power.lock); dev->power.no_pm_callbacks = - (!dev->bus || pm_ops_is_empty(dev->bus->pm)) && - (!dev->class || pm_ops_is_empty(dev->class->pm)) && + (!dev->bus || (pm_ops_is_empty(dev->bus->pm) && + !dev->bus->suspend && !dev->bus->resume)) && + (!dev->class || (pm_ops_is_empty(dev->class->pm) && + !dev->class->suspend && !dev->class->resume)) && (!dev->type || pm_ops_is_empty(dev->type->pm)) && (!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) && - (!dev->driver || pm_ops_is_empty(dev->driver->pm)); + (!dev->driver || (pm_ops_is_empty(dev->driver->pm) && + !dev->driver->suspend && !dev->driver->resume)); spin_unlock_irq(&dev->power.lock); } diff --git a/drivers/base/property.c b/drivers/base/property.c index 06f66687fe0b..7b313b567f4c 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -20,6 +20,7 @@ #include struct property_set { + struct device *dev; struct fwnode_handle fwnode; struct property_entry *properties; }; @@ -817,6 +818,7 @@ static struct property_set *pset_copy_set(const struct property_set *pset) void device_remove_properties(struct device *dev) { struct fwnode_handle *fwnode; + struct property_set *pset; fwnode = dev_fwnode(dev); if (!fwnode) @@ -826,16 +828,16 @@ void device_remove_properties(struct device *dev) * the pset. If there is no real firmware node (ACPI/DT) primary * will hold the pset. */ - if (is_pset_node(fwnode)) { + pset = to_pset_node(fwnode); + if (pset) { set_primary_fwnode(dev, NULL); - pset_free_set(to_pset_node(fwnode)); } else { - fwnode = fwnode->secondary; - if (!IS_ERR(fwnode) && is_pset_node(fwnode)) { + pset = to_pset_node(fwnode->secondary); + if (pset && dev == pset->dev) set_secondary_fwnode(dev, NULL); - pset_free_set(to_pset_node(fwnode)); - } } + if (pset && dev == pset->dev) + pset_free_set(pset); } EXPORT_SYMBOL_GPL(device_remove_properties); @@ -863,6 +865,7 @@ int device_add_properties(struct device *dev, struct property_entry *properties) p->fwnode.type = FWNODE_PDATA; set_secondary_fwnode(dev, &p->fwnode); + p->dev = dev; return 0; } EXPORT_SYMBOL_GPL(device_add_properties); diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 3822eae102db..6f78cea75103 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -2163,6 +2163,9 @@ static void skd_send_fitmsg(struct skd_device *skdev, */ qcmd |= FIT_QCMD_MSGSIZE_64; + /* Make sure skd_msg_buf is written before the doorbell is triggered. */ + smp_wmb(); + SKD_WRITEQ(skdev, qcmd, FIT_Q_COMMAND); } @@ -2209,6 +2212,9 @@ static void skd_send_special_fitmsg(struct skd_device *skdev, qcmd = skspcl->mb_dma_address; qcmd |= FIT_QCMD_QID_NORMAL + FIT_QCMD_MSGSIZE_128; + /* Make sure skd_msg_buf is written before the doorbell is triggered. */ + smp_wmb(); + SKD_WRITEQ(skdev, qcmd, FIT_Q_COMMAND); } @@ -4622,15 +4628,16 @@ static void skd_free_disk(struct skd_device *skdev) { struct gendisk *disk = skdev->disk; - if (disk != NULL) { - struct request_queue *q = disk->queue; + if (disk && (disk->flags & GENHD_FL_UP)) + del_gendisk(disk); - if (disk->flags & GENHD_FL_UP) - del_gendisk(disk); - if (q) - blk_cleanup_queue(q); - put_disk(disk); + if (skdev->queue) { + blk_cleanup_queue(skdev->queue); + skdev->queue = NULL; + disk->queue = NULL; } + + put_disk(disk); skdev->disk = NULL; } diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c index d02f2c14df32..c738baeb6d45 100644 --- a/drivers/bluetooth/btmrvl_sdio.c +++ b/drivers/bluetooth/btmrvl_sdio.c @@ -1682,8 +1682,12 @@ static int btmrvl_sdio_resume(struct device *dev) /* Disable platform specific wakeup interrupt */ if (card->plt_wake_cfg && card->plt_wake_cfg->irq_bt >= 0) { disable_irq_wake(card->plt_wake_cfg->irq_bt); - if (!card->plt_wake_cfg->wake_by_bt) - disable_irq(card->plt_wake_cfg->irq_bt); + disable_irq(card->plt_wake_cfg->irq_bt); + if (card->plt_wake_cfg->wake_by_bt) + /* Undo our disable, since interrupt handler already + * did this. + */ + enable_irq(card->plt_wake_cfg->irq_bt); } return 0; diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c index c7f396903184..70db4d5638a6 100644 --- a/drivers/bus/mvebu-mbus.c +++ b/drivers/bus/mvebu-mbus.c @@ -720,7 +720,7 @@ mvebu_mbus_default_setup_cpu_target(struct mvebu_mbus_state *mbus) if (mbus->hw_io_coherency) w->mbus_attr |= ATTR_HW_COHERENCY; w->base = base & DDR_BASE_CS_LOW_MASK; - w->size = (size | ~DDR_SIZE_MASK) + 1; + w->size = (u64)(size | ~DDR_SIZE_MASK) + 1; } } mvebu_mbus_dram_info.num_cs = cs; diff --git a/drivers/clk/axs10x/i2s_pll_clock.c b/drivers/clk/axs10x/i2s_pll_clock.c index 411310d29581..02d3bcd6216c 100644 --- a/drivers/clk/axs10x/i2s_pll_clock.c +++ b/drivers/clk/axs10x/i2s_pll_clock.c @@ -182,6 +182,7 @@ static int i2s_pll_clk_probe(struct platform_device *pdev) if (IS_ERR(pll_clk->base)) return PTR_ERR(pll_clk->base); + memset(&init, 0, sizeof(init)); clk_name = node->name; init.name = clk_name; init.ops = &i2s_pll_ops; diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c index 21c427d86f28..a26c8a19fe93 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c @@ -803,6 +803,13 @@ static const struct sunxi_ccu_desc sun8i_h3_ccu_desc = { .num_resets = ARRAY_SIZE(sun8i_h3_ccu_resets), }; +static struct ccu_mux_nb sun8i_h3_cpu_nb = { + .common = &cpux_clk.common, + .cm = &cpux_clk.mux, + .delay_us = 1, /* > 8 clock cycles at 24 MHz */ + .bypass_index = 1, /* index of 24 MHz oscillator */ +}; + static void __init sun8i_h3_ccu_setup(struct device_node *node) { void __iomem *reg; @@ -821,6 +828,9 @@ static void __init sun8i_h3_ccu_setup(struct device_node *node) writel(val | (3 << 16), reg + SUN8I_H3_PLL_AUDIO_REG); sunxi_ccu_probe(node, reg, &sun8i_h3_ccu_desc); + + ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk, + &sun8i_h3_cpu_nb); } CLK_OF_DECLARE(sun8i_h3_ccu, "allwinner,sun8i-h3-ccu", sun8i_h3_ccu_setup); diff --git a/drivers/clocksource/cs5535-clockevt.c b/drivers/clocksource/cs5535-clockevt.c index 9a7e37cf56b0..e1d7373e63e0 100644 --- a/drivers/clocksource/cs5535-clockevt.c +++ b/drivers/clocksource/cs5535-clockevt.c @@ -117,7 +117,8 @@ static irqreturn_t mfgpt_tick(int irq, void *dev_id) /* Turn off the clock (and clear the event) */ disable_timer(cs5535_event_clock); - if (clockevent_state_shutdown(&cs5535_clockevent)) + if (clockevent_state_detached(&cs5535_clockevent) || + clockevent_state_shutdown(&cs5535_clockevent)) return IRQ_HANDLED; /* Clear the counter */ diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index d89b8afe23b6..bc3917d6015a 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -244,7 +244,7 @@ config ARM_PXA2xx_CPUFREQ config ACPI_CPPC_CPUFREQ tristate "CPUFreq driver based on the ACPI CPPC spec" - depends on ACPI + depends on ACPI_PROCESSOR select ACPI_CPPC_LIB default n help diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 4d3ec92cbabf..e4cbfe52d4a0 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -280,6 +280,13 @@ static int cpufreq_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = transition_latency; + /* + * Android: set default parameters for parity between schedutil and + * schedfreq + */ + policy->up_transition_delay_us = transition_latency / NSEC_PER_USEC; + policy->down_transition_delay_us = 50000; /* 50ms */ + return 0; out_free_cpufreq_table: diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 80fa656da5ab..a59ae8e24d3d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -609,6 +609,7 @@ static void intel_pstate_hwp_set_online_cpus(void) static int pid_param_set(void *data, u64 val) { *(u32 *)data = val; + pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; intel_pstate_reset_all_pid(); return 0; } diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c index 58a4244b4752..3f26a415ef44 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c @@ -1,8 +1,9 @@ /* * AMD Cryptographic Coprocessor (CCP) AES XTS crypto API support * - * Copyright (C) 2013 Advanced Micro Devices, Inc. + * Copyright (C) 2013,2017 Advanced Micro Devices, Inc. * + * Author: Gary R Hook * Author: Tom Lendacky * * This program is free software; you can redistribute it and/or modify @@ -164,6 +165,7 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req, memset(&rctx->cmd, 0, sizeof(rctx->cmd)); INIT_LIST_HEAD(&rctx->cmd.entry); rctx->cmd.engine = CCP_ENGINE_XTS_AES_128; + rctx->cmd.u.xts.type = CCP_AES_TYPE_128; rctx->cmd.u.xts.action = (encrypt) ? CCP_AES_ACTION_ENCRYPT : CCP_AES_ACTION_DECRYPT; rctx->cmd.u.xts.unit_size = unit_size; diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c index 2c0ce5f605b3..17b19a68e269 100644 --- a/drivers/crypto/ccp/ccp-dev-v5.c +++ b/drivers/crypto/ccp/ccp-dev-v5.c @@ -131,6 +131,7 @@ union ccp_function { #define CCP_AES_MODE(p) ((p)->aes.mode) #define CCP_AES_TYPE(p) ((p)->aes.type) #define CCP_XTS_SIZE(p) ((p)->aes_xts.size) +#define CCP_XTS_TYPE(p) ((p)->aes_xts.type) #define CCP_XTS_ENCRYPT(p) ((p)->aes_xts.encrypt) #define CCP_SHA_TYPE(p) ((p)->sha.type) #define CCP_RSA_SIZE(p) ((p)->rsa.size) @@ -318,6 +319,7 @@ static int ccp5_perform_xts_aes(struct ccp_op *op) CCP5_CMD_PROT(&desc) = 0; function.raw = 0; + CCP_XTS_TYPE(&function) = op->u.xts.type; CCP_XTS_ENCRYPT(&function) = op->u.xts.action; CCP_XTS_SIZE(&function) = op->u.xts.unit_size; CCP5_CMD_FUNCTION(&desc) = function.raw; diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h index 8ac7ae17e1f4..e23c36c7691c 100644 --- a/drivers/crypto/ccp/ccp-dev.h +++ b/drivers/crypto/ccp/ccp-dev.h @@ -187,6 +187,7 @@ #define CCP_AES_CTX_SB_COUNT 1 #define CCP_XTS_AES_KEY_SB_COUNT 1 +#define CCP5_XTS_AES_KEY_SB_COUNT 2 #define CCP_XTS_AES_CTX_SB_COUNT 1 #define CCP_SHA_SB_COUNT 1 @@ -472,6 +473,7 @@ struct ccp_aes_op { }; struct ccp_xts_aes_op { + enum ccp_aes_type type; enum ccp_aes_action action; enum ccp_xts_aes_unit_size unit_size; }; diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index 50fae4442801..64deb006c3be 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -779,6 +779,8 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_op op; unsigned int unit_size, dm_offset; bool in_place = false; + unsigned int sb_count; + enum ccp_aes_type aestype; int ret; switch (xts->unit_size) { @@ -802,7 +804,9 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, return -EINVAL; } - if (xts->key_len != AES_KEYSIZE_128) + if (xts->key_len == AES_KEYSIZE_128) + aestype = CCP_AES_TYPE_128; + else return -EINVAL; if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1))) @@ -824,23 +828,44 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, op.sb_key = cmd_q->sb_key; op.sb_ctx = cmd_q->sb_ctx; op.init = 1; + op.u.xts.type = aestype; op.u.xts.action = xts->action; op.u.xts.unit_size = xts->unit_size; - /* All supported key sizes fit in a single (32-byte) SB entry - * and must be in little endian format. Use the 256-bit byte - * swap passthru option to convert from big endian to little - * endian. + /* A version 3 device only supports 128-bit keys, which fits into a + * single SB entry. A version 5 device uses a 512-bit vector, so two + * SB entries. */ + if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) + sb_count = CCP_XTS_AES_KEY_SB_COUNT; + else + sb_count = CCP5_XTS_AES_KEY_SB_COUNT; ret = ccp_init_dm_workarea(&key, cmd_q, - CCP_XTS_AES_KEY_SB_COUNT * CCP_SB_BYTES, + sb_count * CCP_SB_BYTES, DMA_TO_DEVICE); if (ret) return ret; - dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128; - ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len); - ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len); + if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) { + /* All supported key sizes must be in little endian format. + * Use the 256-bit byte swap passthru option to convert from + * big endian to little endian. + */ + dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128; + ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len); + ccp_set_dm_area(&key, 0, xts->key, xts->key_len, xts->key_len); + } else { + /* Version 5 CCPs use a 512-bit space for the key: each portion + * occupies 256 bits, or one entire slot, and is zero-padded. + */ + unsigned int pad; + + dm_offset = CCP_SB_BYTES; + pad = dm_offset - xts->key_len; + ccp_set_dm_area(&key, pad, xts->key, 0, xts->key_len); + ccp_set_dm_area(&key, dm_offset + pad, xts->key, xts->key_len, + xts->key_len); + } ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key, CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 571de2f284cf..e2d323fa2437 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1756,9 +1756,9 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, req_ctx->swinit = 0; } else { desc->ptr[1] = zero_entry; - /* Indicate next op is not the first. */ - req_ctx->first = 0; } + /* Indicate next op is not the first. */ + req_ctx->first = 0; /* HMAC key */ if (ctx->keylen) @@ -1769,7 +1769,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, sg_count = edesc->src_nents ?: 1; if (is_sec1 && sg_count > 1) - sg_copy_to_buffer(areq->src, sg_count, edesc->buf, length); + sg_copy_to_buffer(req_ctx->psrc, sg_count, edesc->buf, length); else sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count, DMA_TO_DEVICE); @@ -3057,7 +3057,8 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, t_alg->algt.alg.hash.final = ahash_final; t_alg->algt.alg.hash.finup = ahash_finup; t_alg->algt.alg.hash.digest = ahash_digest; - t_alg->algt.alg.hash.setkey = ahash_setkey; + if (!strncmp(alg->cra_name, "hmac", 4)) + t_alg->algt.alg.hash.setkey = ahash_setkey; t_alg->algt.alg.hash.import = ahash_import; t_alg->algt.alg.hash.export = ahash_export; diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 7309c0824887..a2449d77af07 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -574,7 +574,7 @@ struct devfreq *devfreq_add_device(struct device *dev, err = device_register(&devfreq->dev); if (err) { mutex_unlock(&devfreq->lock); - goto err_out; + goto err_dev; } devfreq->trans_table = devm_kzalloc(&devfreq->dev, sizeof(unsigned int) * @@ -618,6 +618,9 @@ err_init: mutex_unlock(&devfreq_list_lock); device_unregister(&devfreq->dev); +err_dev: + if (devfreq) + kfree(devfreq); err_out: return ERR_PTR(err); } diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 77242b37ef87..57962bff7532 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1143,11 +1143,24 @@ static struct dma_async_tx_descriptor *edma_prep_dma_memcpy( struct edma_desc *edesc; struct device *dev = chan->device->dev; struct edma_chan *echan = to_edma_chan(chan); - unsigned int width, pset_len; + unsigned int width, pset_len, array_size; if (unlikely(!echan || !len)) return NULL; + /* Align the array size (acnt block) with the transfer properties */ + switch (__ffs((src | dest | len))) { + case 0: + array_size = SZ_32K - 1; + break; + case 1: + array_size = SZ_32K - 2; + break; + default: + array_size = SZ_32K - 4; + break; + } + if (len < SZ_64K) { /* * Transfer size less than 64K can be handled with one paRAM @@ -1169,7 +1182,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_memcpy( * When the full_length is multibple of 32767 one slot can be * used to complete the transfer. */ - width = SZ_32K - 1; + width = array_size; pset_len = rounddown(len, width); /* One slot is enough for lengths multiple of (SZ_32K -1) */ if (unlikely(pset_len == len)) @@ -1217,7 +1230,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_memcpy( } dest += pset_len; src += pset_len; - pset_len = width = len % (SZ_32K - 1); + pset_len = width = len % array_size; ret = edma_config_pset(chan, &edesc->pset[1], src, dest, 1, width, pset_len, DMA_MEM_TO_MEM); diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c index 2403475a37cf..88a00d06def6 100644 --- a/drivers/dma/ti-dma-crossbar.c +++ b/drivers/dma/ti-dma-crossbar.c @@ -262,13 +262,14 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, mutex_lock(&xbar->mutex); map->xbar_out = find_first_zero_bit(xbar->dma_inuse, xbar->dma_requests); - mutex_unlock(&xbar->mutex); if (map->xbar_out == xbar->dma_requests) { + mutex_unlock(&xbar->mutex); dev_err(&pdev->dev, "Run out of free DMA requests\n"); kfree(map); return ERR_PTR(-ENOMEM); } set_bit(map->xbar_out, xbar->dma_inuse); + mutex_unlock(&xbar->mutex); map->xbar_in = (u16)dma_spec->args[0]; diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index daaac2c79ca7..7db692ed3dea 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -981,20 +981,19 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) pr_cont("]: 0x%016llx\n", m->status); if (m->status & MCI_STATUS_ADDRV) - pr_emerg(HW_ERR "Error Addr: 0x%016llx", m->addr); + pr_emerg(HW_ERR "Error Addr: 0x%016llx\n", m->addr); if (boot_cpu_has(X86_FEATURE_SMCA)) { + pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid); + if (m->status & MCI_STATUS_SYNDV) pr_cont(", Syndrome: 0x%016llx", m->synd); - pr_cont(", IPID: 0x%016llx", m->ipid); - pr_cont("\n"); decode_smca_errors(m); goto err_code; - } else - pr_cont("\n"); + } if (!fam_ops) goto err_code; diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c index 42f41e808292..27f67c28e700 100644 --- a/drivers/extcon/extcon-axp288.c +++ b/drivers/extcon/extcon-axp288.c @@ -168,7 +168,7 @@ static int axp288_handle_chrg_det_event(struct axp288_extcon_info *info) return ret; } - vbus_attach = (pwr_stat & PS_STAT_VBUS_PRESENT); + vbus_attach = (pwr_stat & PS_STAT_VBUS_VALID); if (!vbus_attach) goto notify_otg; diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 5e23e2d305e7..b12bc38dfa71 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -10,7 +10,7 @@ cflags-$(CONFIG_X86) += -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 \ -fPIC -fno-strict-aliasing -mno-red-zone \ -mno-mmx -mno-sse -cflags-$(CONFIG_ARM64) := $(subst -pg,,$(KBUILD_CFLAGS)) +cflags-$(CONFIG_ARM64) := $(subst -pg,,$(KBUILD_CFLAGS)) -fpie cflags-$(CONFIG_ARM) := $(subst -pg,,$(KBUILD_CFLAGS)) -g0 \ -fno-builtin -fpic -mno-single-pic-base diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index 993aa56755f6..7ec09ea81b94 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -18,8 +18,6 @@ #include "efistub.h" -bool __nokaslr; - static int efi_get_secureboot(efi_system_table_t *sys_table_arg) { static efi_char16_t const sb_var_name[] = { @@ -268,18 +266,6 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, goto fail; } - /* check whether 'nokaslr' was passed on the command line */ - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { - static const u8 default_cmdline[] = CONFIG_CMDLINE; - const u8 *str, *cmdline = cmdline_ptr; - - if (IS_ENABLED(CONFIG_CMDLINE_FORCE)) - cmdline = default_cmdline; - str = strstr(cmdline, "nokaslr"); - if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) - __nokaslr = true; - } - si = setup_graphics(sys_table); status = handle_kernel_image(sys_table, image_addr, &image_size, @@ -291,9 +277,13 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, goto fail_free_cmdline; } - status = efi_parse_options(cmdline_ptr); - if (status != EFI_SUCCESS) - pr_efi_err(sys_table, "Failed to parse EFI cmdline options\n"); + if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || + IS_ENABLED(CONFIG_CMDLINE_FORCE) || + cmdline_size == 0) + efi_parse_options(CONFIG_CMDLINE); + + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && cmdline_size > 0) + efi_parse_options(cmdline_ptr); secure_boot = efi_get_secureboot(sys_table); if (secure_boot > 0) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index eae693eb3e91..f7a6970e9abc 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -9,15 +9,21 @@ * published by the Free Software Foundation. * */ + +/* + * To prevent the compiler from emitting GOT-indirected (and thus absolute) + * references to the section markers, override their visibility as 'hidden' + */ +#pragma GCC visibility push(hidden) +#include +#pragma GCC visibility pop + #include #include -#include #include #include "efistub.h" -extern bool __nokaslr; - efi_status_t check_platform_features(efi_system_table_t *sys_table_arg) { u64 tg; @@ -52,7 +58,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg, u64 phys_seed = 0; if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { - if (!__nokaslr) { + if (!nokaslr()) { status = efi_get_random_bytes(sys_table_arg, sizeof(phys_seed), (u8 *)&phys_seed); diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index aded10662020..2db141c84e76 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -41,6 +41,13 @@ static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE; #define EFI_ALLOC_ALIGN EFI_PAGE_SIZE #endif +static int __section(.data) __nokaslr; + +int __pure nokaslr(void) +{ + return __nokaslr; +} + #define EFI_MMAP_NR_SLACK_SLOTS 8 struct file_info { @@ -351,10 +358,14 @@ void efi_free(efi_system_table_t *sys_table_arg, unsigned long size, * environments, first in the early boot environment of the EFI boot * stub, and subsequently during the kernel boot. */ -efi_status_t efi_parse_options(char *cmdline) +efi_status_t efi_parse_options(char const *cmdline) { char *str; + str = strstr(cmdline, "nokaslr"); + if (str == cmdline || (str && str > cmdline && *(str - 1) == ' ')) + __nokaslr = 1; + /* * If no EFI parameters were specified on the cmdline we've got * nothing to do. diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index fac67992bede..d0e5acab547f 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -15,6 +15,8 @@ */ #undef __init +extern int __pure nokaslr(void); + void efi_char16_printk(efi_system_table_t *, efi_char16_t *); efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 264899df9bfc..05ff98b43c50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -491,6 +491,9 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_ case TTM_PL_TT: break; case TTM_PL_VRAM: + if (mem->start == AMDGPU_BO_INVALID_OFFSET) + return -EINVAL; + mem->bus.offset = mem->start << PAGE_SHIFT; /* check if it's visible */ if ((mem->bus.offset + mem->bus.size) > adev->mc.visible_vram_size) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index a6a4b2b1c0d9..6a3470f84998 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -739,8 +739,10 @@ int kfd_wait_on_events(struct kfd_process *p, struct kfd_event_data event_data; if (copy_from_user(&event_data, &events[i], - sizeof(struct kfd_event_data))) + sizeof(struct kfd_event_data))) { + ret = -EFAULT; goto fail; + } ret = init_event_waiter(p, &event_waiters[i], event_data.event_id, i); diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index 82c193e5e0d6..afe0480d95c9 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -150,13 +150,8 @@ static void malidp_de_plane_update(struct drm_plane *plane, /* convert src values from Q16 fixed point to integer */ src_w = plane->state->src_w >> 16; src_h = plane->state->src_h >> 16; - if (plane->state->rotation & MALIDP_ROTATED_MASK) { - dest_w = plane->state->crtc_h; - dest_h = plane->state->crtc_w; - } else { - dest_w = plane->state->crtc_w; - dest_h = plane->state->crtc_h; - } + dest_w = plane->state->crtc_w; + dest_h = plane->state->crtc_h; malidp_hw_write(mp->hwdev, format_id, mp->layer->base); @@ -189,9 +184,9 @@ static void malidp_de_plane_update(struct drm_plane *plane, if (plane->state->rotation & DRM_ROTATE_MASK) val = ilog2(plane->state->rotation & DRM_ROTATE_MASK) << LAYER_ROT_OFFSET; if (plane->state->rotation & DRM_REFLECT_X) - val |= LAYER_V_FLIP; - if (plane->state->rotation & DRM_REFLECT_Y) val |= LAYER_H_FLIP; + if (plane->state->rotation & DRM_REFLECT_Y) + val |= LAYER_V_FLIP; /* set the 'enable layer' bit */ val |= LAYER_ENABLE; diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 4e16dffa1e2e..33778bf53590 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1871,10 +1871,10 @@ int drm_mode_atomic_ioctl(struct drm_device *dev, struct drm_atomic_state *state; struct drm_modeset_acquire_ctx ctx; struct drm_plane *plane; - struct drm_out_fence_state *fence_state = NULL; + struct drm_out_fence_state *fence_state; unsigned plane_mask; int ret = 0; - unsigned int i, j, num_fences = 0; + unsigned int i, j, num_fences; /* disallow for drivers not supporting atomic: */ if (!drm_core_check_feature(dev, DRIVER_ATOMIC)) @@ -1915,6 +1915,8 @@ retry: plane_mask = 0; copied_objs = 0; copied_props = 0; + fence_state = NULL; + num_fences = 0; for (i = 0; i < arg->count_objs; i++) { uint32_t obj_id, count_props; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 0370b842d9cc..82dd57d4843c 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -549,12 +549,15 @@ static const struct etnaviv_gem_ops etnaviv_gem_shmem_ops = { void etnaviv_gem_free_object(struct drm_gem_object *obj) { struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); + struct etnaviv_drm_private *priv = obj->dev->dev_private; struct etnaviv_vram_mapping *mapping, *tmp; /* object should not be active */ WARN_ON(is_active(etnaviv_obj)); + mutex_lock(&priv->gem_lock); list_del(&etnaviv_obj->gem_node); + mutex_unlock(&priv->gem_lock); list_for_each_entry_safe(mapping, tmp, &etnaviv_obj->vram_list, obj_node) { diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 4ac36e3c341f..80c5cc5640c1 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1152,6 +1152,13 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, is_hdmi = is_dvi && (child->common.device_type & DEVICE_TYPE_NOT_HDMI_OUTPUT) == 0; is_edp = is_dp && (child->common.device_type & DEVICE_TYPE_INTERNAL_CONNECTOR); + if (port == PORT_A && is_dvi) { + DRM_DEBUG_KMS("VBT claims port A supports DVI%s, ignoring\n", + is_hdmi ? "/HDMI" : ""); + is_dvi = false; + is_hdmi = false; + } + info->supports_dvi = is_dvi; info->supports_hdmi = is_hdmi; info->supports_dp = is_dp; @@ -1212,7 +1219,7 @@ static void parse_ddi_ports(struct drm_i915_private *dev_priv, { enum port port; - if (!HAS_DDI(dev_priv)) + if (!HAS_DDI(dev_priv) && !IS_CHERRYVIEW(dev_priv)) return; if (!dev_priv->vbt.child_dev_num) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f8efd20e4a90..ce32303b3013 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11471,13 +11471,10 @@ struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, { struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder; struct drm_display_mode *mode; struct intel_crtc_state *pipe_config; - int htot = I915_READ(HTOTAL(cpu_transcoder)); - int hsync = I915_READ(HSYNC(cpu_transcoder)); - int vtot = I915_READ(VTOTAL(cpu_transcoder)); - int vsync = I915_READ(VSYNC(cpu_transcoder)); + u32 htot, hsync, vtot, vsync; enum pipe pipe = intel_crtc->pipe; mode = kzalloc(sizeof(*mode), GFP_KERNEL); @@ -11505,6 +11502,13 @@ struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, i9xx_crtc_clock_get(intel_crtc, pipe_config); mode->clock = pipe_config->port_clock / pipe_config->pixel_multiplier; + + cpu_transcoder = pipe_config->cpu_transcoder; + htot = I915_READ(HTOTAL(cpu_transcoder)); + hsync = I915_READ(HSYNC(cpu_transcoder)); + vtot = I915_READ(VTOTAL(cpu_transcoder)); + vsync = I915_READ(VSYNC(cpu_transcoder)); + mode->hdisplay = (htot & 0xffff) + 1; mode->htotal = ((htot & 0xffff0000) >> 16) + 1; mode->hsync_start = (hsync & 0xffff) + 1; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 7b06280b23aa..afa3d010c650 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2193,8 +2193,8 @@ static void edp_panel_off(struct intel_dp *intel_dp) I915_WRITE(pp_ctrl_reg, pp); POSTING_READ(pp_ctrl_reg); - intel_dp->panel_power_off_time = ktime_get_boottime(); wait_panel_off(intel_dp); + intel_dp->panel_power_off_time = ktime_get_boottime(); /* We got a reference when we enabled the VDD. */ power_domain = intel_display_port_aux_power_domain(intel_encoder); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index a2655cd5a84e..8ab6f30dc23c 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -272,8 +272,30 @@ static int intel_overlay_on(struct intel_overlay *overlay) return intel_overlay_do_wait_request(overlay, req, NULL); } +static void intel_overlay_flip_prepare(struct intel_overlay *overlay, + struct i915_vma *vma) +{ + enum pipe pipe = overlay->crtc->pipe; + + WARN_ON(overlay->old_vma); + + i915_gem_track_fb(overlay->vma ? overlay->vma->obj : NULL, + vma ? vma->obj : NULL, + INTEL_FRONTBUFFER_OVERLAY(pipe)); + + intel_frontbuffer_flip_prepare(overlay->i915, + INTEL_FRONTBUFFER_OVERLAY(pipe)); + + overlay->old_vma = overlay->vma; + if (vma) + overlay->vma = i915_vma_get(vma); + else + overlay->vma = NULL; +} + /* overlay needs to be enabled in OCMD reg */ static int intel_overlay_continue(struct intel_overlay *overlay, + struct i915_vma *vma, bool load_polyphase_filter) { struct drm_i915_private *dev_priv = overlay->i915; @@ -308,27 +330,35 @@ static int intel_overlay_continue(struct intel_overlay *overlay, intel_ring_emit(ring, flip_addr); intel_ring_advance(ring); + intel_overlay_flip_prepare(overlay, vma); + intel_overlay_submit_request(overlay, req, NULL); return 0; } +static void intel_overlay_release_old_vma(struct intel_overlay *overlay) +{ + struct i915_vma *vma; + + vma = fetch_and_zero(&overlay->old_vma); + if (WARN_ON(!vma)) + return; + + intel_frontbuffer_flip_complete(overlay->i915, + INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); + + i915_gem_object_unpin_from_display_plane(vma); + i915_vma_put(vma); +} + static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active, struct drm_i915_gem_request *req) { struct intel_overlay *overlay = container_of(active, typeof(*overlay), last_flip); - struct i915_vma *vma; - vma = fetch_and_zero(&overlay->old_vma); - if (WARN_ON(!vma)) - return; - - i915_gem_track_fb(vma->obj, NULL, - INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); - - i915_gem_object_unpin_from_display_plane(vma); - i915_vma_put(vma); + intel_overlay_release_old_vma(overlay); } static void intel_overlay_off_tail(struct i915_gem_active *active, @@ -336,15 +366,8 @@ static void intel_overlay_off_tail(struct i915_gem_active *active, { struct intel_overlay *overlay = container_of(active, typeof(*overlay), last_flip); - struct i915_vma *vma; - /* never have the overlay hw on without showing a frame */ - vma = fetch_and_zero(&overlay->vma); - if (WARN_ON(!vma)) - return; - - i915_gem_object_unpin_from_display_plane(vma); - i915_vma_put(vma); + intel_overlay_release_old_vma(overlay); overlay->crtc->overlay = NULL; overlay->crtc = NULL; @@ -398,6 +421,8 @@ static int intel_overlay_off(struct intel_overlay *overlay) } intel_ring_advance(ring); + intel_overlay_flip_prepare(overlay, NULL); + return intel_overlay_do_wait_request(overlay, req, intel_overlay_off_tail); } @@ -836,18 +861,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, intel_overlay_unmap_regs(overlay, regs); - ret = intel_overlay_continue(overlay, scale_changed); + ret = intel_overlay_continue(overlay, vma, scale_changed); if (ret) goto out_unpin; - i915_gem_track_fb(overlay->vma ? overlay->vma->obj : NULL, - vma->obj, INTEL_FRONTBUFFER_OVERLAY(pipe)); - - overlay->old_vma = overlay->vma; - overlay->vma = vma; - - intel_frontbuffer_flip(dev_priv, INTEL_FRONTBUFFER_OVERLAY(pipe)); - return 0; out_unpin: @@ -1215,6 +1232,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); + i915_gem_object_put(new_bo); kfree(params); diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 9b307cee3008..dff478498f05 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -387,6 +387,13 @@ static bool intel_psr_match_conditions(struct intel_dp *intel_dp) return false; } + /* PSR2 is restricted to work with panel resolutions upto 3200x2000 */ + if (intel_crtc->config->pipe_src_w > 3200 || + intel_crtc->config->pipe_src_h > 2000) { + dev_priv->psr.psr2_support = false; + return false; + } + dev_priv->psr.source_ok = true; return true; } @@ -425,7 +432,6 @@ void intel_psr_enable(struct intel_dp *intel_dp) struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc); if (!HAS_PSR(dev)) { DRM_DEBUG_KMS("PSR not supported on this platform\n"); @@ -452,12 +458,7 @@ void intel_psr_enable(struct intel_dp *intel_dp) hsw_psr_setup_vsc(intel_dp); if (dev_priv->psr.psr2_support) { - /* PSR2 is restricted to work with panel resolutions upto 3200x2000 */ - if (crtc->config->pipe_src_w > 3200 || - crtc->config->pipe_src_h > 2000) - dev_priv->psr.psr2_support = false; - else - skl_psr_setup_su_vsc(intel_dp); + skl_psr_setup_su_vsc(intel_dp); } /* diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c index 8e2e24a74774..44e116f7880d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c @@ -39,5 +39,5 @@ int g84_bsp_new(struct nvkm_device *device, int index, struct nvkm_engine **pengine) { return nvkm_xtensa_new_(&g84_bsp, device, index, - true, 0x103000, pengine); + device->chipset != 0x92, 0x103000, pengine); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 6584d505460c..133f89600279 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -1129,7 +1129,7 @@ gf100_gr_trap_intr(struct gf100_gr *gr) if (trap & 0x00000008) { u32 stat = nvkm_rd32(device, 0x408030); - nvkm_snprintbf(error, sizeof(error), gf100_m2mf_error, + nvkm_snprintbf(error, sizeof(error), gf100_ccache_error, stat & 0x3fffffff); nvkm_error(subdev, "CCACHE %08x [%s]\n", stat, error); nvkm_wr32(device, 0x408030, 0xc0000000); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c index 5df9669ea39c..240872a27c37 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c @@ -240,6 +240,8 @@ nvkm_vm_unmap_pgt(struct nvkm_vm *vm, int big, u32 fpde, u32 lpde) mmu->func->map_pgt(vpgd->obj, pde, vpgt->mem); } + mmu->func->flush(vm); + nvkm_memory_del(&pgt); } } diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 3b21ca5a6c81..82b01123c386 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1674,7 +1674,7 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, radeon_agp_suspend(rdev); pci_save_state(dev->pdev); - if (freeze && rdev->family >= CHIP_CEDAR) { + if (freeze && rdev->family >= CHIP_CEDAR && !(rdev->flags & RADEON_IS_IGP)) { rdev->asic->asic_reset(rdev, true); pci_restore_state(dev->pdev); } else if (suspend) { diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index c3b21865443e..1feec34ca9dd 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -47,6 +47,13 @@ static void sun4i_drv_disable_vblank(struct drm_device *drm, unsigned int pipe) sun4i_tcon_enable_vblank(tcon, false); } +static void sun4i_drv_lastclose(struct drm_device *dev) +{ + struct sun4i_drv *drv = dev->dev_private; + + drm_fbdev_cma_restore_mode(drv->fbdev); +} + static const struct file_operations sun4i_drv_fops = { .owner = THIS_MODULE, .open = drm_open, @@ -65,6 +72,7 @@ static struct drm_driver sun4i_drv_driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME | DRIVER_ATOMIC, /* Generic Operations */ + .lastclose = sun4i_drv_lastclose, .fops = &sun4i_drv_fops, .name = "sun4i-drm", .desc = "Allwinner sun4i Display Engine", diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 8008e06b7efe..865e7c262322 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -604,7 +604,8 @@ static int i2c_hid_alloc_buffers(struct i2c_hid *ihid, size_t report_size) { /* the worst case is computed from the set_report command with a * reportID > 15 and the maximum report length */ - int args_len = sizeof(__u8) + /* optional ReportID byte */ + int args_len = sizeof(__u8) + /* ReportID */ + sizeof(__u8) + /* optional ReportID byte */ sizeof(__u16) + /* data register */ sizeof(__u16) + /* size of the report */ report_size; /* report */ diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index ae83af649a60..7838343eb37c 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -971,6 +971,8 @@ static int usbhid_parse(struct hid_device *hid) unsigned int rsize = 0; char *rdesc; int ret, n; + int num_descriptors; + size_t offset = offsetof(struct hid_descriptor, desc); quirks = usbhid_lookup_quirk(le16_to_cpu(dev->descriptor.idVendor), le16_to_cpu(dev->descriptor.idProduct)); @@ -993,10 +995,18 @@ static int usbhid_parse(struct hid_device *hid) return -ENODEV; } + if (hdesc->bLength < sizeof(struct hid_descriptor)) { + dbg_hid("hid descriptor is too short\n"); + return -EINVAL; + } + hid->version = le16_to_cpu(hdesc->bcdHID); hid->country = hdesc->bCountryCode; - for (n = 0; n < hdesc->bNumDescriptors; n++) + num_descriptors = min_t(int, hdesc->bNumDescriptors, + (hdesc->bLength - offset) / sizeof(struct hid_class_descriptor)); + + for (n = 0; n < num_descriptors; n++) if (hdesc->desc[n].bDescriptorType == HID_DT_REPORT) rsize = le16_to_cpu(hdesc->desc[n].wDescriptorLength); diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 0c535d0f3b95..d72dfb2bbdb8 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -611,8 +611,10 @@ static struct wacom_hdev_data *wacom_get_hdev_data(struct hid_device *hdev) /* Try to find an already-probed interface from the same device */ list_for_each_entry(data, &wacom_udev_list, list) { - if (compare_device_paths(hdev, data->dev, '/')) + if (compare_device_paths(hdev, data->dev, '/')) { + kref_get(&data->kref); return data; + } } /* Fallback to finding devices that appear to be "siblings" */ @@ -712,6 +714,9 @@ static int wacom_led_control(struct wacom *wacom) if (!wacom->led.groups) return -ENOTSUPP; + if (wacom->wacom_wac.features.type == REMOTE) + return -ENOTSUPP; + if (wacom->wacom_wac.pid) { /* wireless connected */ report_id = WAC_CMD_WL_LED_CONTROL; buf_size = 13; @@ -2433,6 +2438,8 @@ static void wacom_remove(struct hid_device *hdev) if (hdev->bus == BUS_BLUETOOTH) device_remove_file(&hdev->dev, &dev_attr_speed); + wacom_release_resources(wacom); + hid_set_drvdata(hdev, NULL); } diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index c6a922ee5d3b..db951c4fd6dd 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -559,8 +559,8 @@ static int wacom_intuos_pad(struct wacom_wac *wacom) keys = data[9] & 0x07; } } else { - buttons = ((data[6] & 0x10) << 10) | - ((data[5] & 0x10) << 9) | + buttons = ((data[6] & 0x10) << 5) | + ((data[5] & 0x10) << 4) | ((data[6] & 0x0F) << 4) | (data[5] & 0x0F); } diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c index e47d8c9db03a..75126e4e3f05 100644 --- a/drivers/hv/hv_fcopy.c +++ b/drivers/hv/hv_fcopy.c @@ -161,6 +161,10 @@ static void fcopy_send_data(struct work_struct *dummy) out_src = smsg_out; break; + case WRITE_TO_FILE: + out_src = fcopy_transaction.fcopy_msg; + out_len = sizeof(struct hv_do_fcopy); + break; default: out_src = fcopy_transaction.fcopy_msg; out_len = fcopy_transaction.recv_len; diff --git a/drivers/hwmon/gl520sm.c b/drivers/hwmon/gl520sm.c index dee93ec87d02..84e0994aafdd 100644 --- a/drivers/hwmon/gl520sm.c +++ b/drivers/hwmon/gl520sm.c @@ -208,11 +208,13 @@ static ssize_t get_cpu_vid(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR(cpu0_vid, S_IRUGO, get_cpu_vid, NULL); -#define VDD_FROM_REG(val) (((val) * 95 + 2) / 4) -#define VDD_TO_REG(val) clamp_val((((val) * 4 + 47) / 95), 0, 255) +#define VDD_FROM_REG(val) DIV_ROUND_CLOSEST((val) * 95, 4) +#define VDD_CLAMP(val) clamp_val(val, 0, 255 * 95 / 4) +#define VDD_TO_REG(val) DIV_ROUND_CLOSEST(VDD_CLAMP(val) * 4, 95) -#define IN_FROM_REG(val) ((val) * 19) -#define IN_TO_REG(val) clamp_val((((val) + 9) / 19), 0, 255) +#define IN_FROM_REG(val) ((val) * 19) +#define IN_CLAMP(val) clamp_val(val, 0, 255 * 19) +#define IN_TO_REG(val) DIV_ROUND_CLOSEST(IN_CLAMP(val), 19) static ssize_t get_in_input(struct device *dev, struct device_attribute *attr, char *buf) @@ -349,8 +351,13 @@ static SENSOR_DEVICE_ATTR(in4_max, S_IRUGO | S_IWUSR, #define DIV_FROM_REG(val) (1 << (val)) #define FAN_FROM_REG(val, div) ((val) == 0 ? 0 : (480000 / ((val) << (div)))) -#define FAN_TO_REG(val, div) ((val) <= 0 ? 0 : \ - clamp_val((480000 + ((val) << ((div)-1))) / ((val) << (div)), 1, 255)) + +#define FAN_BASE(div) (480000 >> (div)) +#define FAN_CLAMP(val, div) clamp_val(val, FAN_BASE(div) / 255, \ + FAN_BASE(div)) +#define FAN_TO_REG(val, div) ((val) == 0 ? 0 : \ + DIV_ROUND_CLOSEST(480000, \ + FAN_CLAMP(val, div) << (div))) static ssize_t get_fan_input(struct device *dev, struct device_attribute *attr, char *buf) @@ -513,9 +520,9 @@ static SENSOR_DEVICE_ATTR(fan2_div, S_IRUGO | S_IWUSR, static DEVICE_ATTR(fan1_off, S_IRUGO | S_IWUSR, get_fan_off, set_fan_off); -#define TEMP_FROM_REG(val) (((val) - 130) * 1000) -#define TEMP_TO_REG(val) clamp_val(((((val) < 0 ? \ - (val) - 500 : (val) + 500) / 1000) + 130), 0, 255) +#define TEMP_FROM_REG(val) (((val) - 130) * 1000) +#define TEMP_CLAMP(val) clamp_val(val, -130000, 125000) +#define TEMP_TO_REG(val) (DIV_ROUND_CLOSEST(TEMP_CLAMP(val), 1000) + 130) static ssize_t get_temp_input(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c index a6ea387b5b00..877a0ed76abf 100644 --- a/drivers/hwtracing/stm/core.c +++ b/drivers/hwtracing/stm/core.c @@ -1119,7 +1119,7 @@ void stm_source_unregister_device(struct stm_source_data *data) stm_source_link_drop(src); - device_destroy(&stm_source_class, src->dev.devt); + device_unregister(&src->dev); } EXPORT_SYMBOL_GPL(stm_source_unregister_device); diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c index 0b86c6173e07..c925a690cb32 100644 --- a/drivers/i2c/busses/i2c-at91.c +++ b/drivers/i2c/busses/i2c-at91.c @@ -1180,6 +1180,7 @@ static int at91_twi_suspend_noirq(struct device *dev) static int at91_twi_resume_noirq(struct device *dev) { + struct at91_twi_dev *twi_dev = dev_get_drvdata(dev); int ret; if (!pm_runtime_status_suspended(dev)) { @@ -1191,6 +1192,8 @@ static int at91_twi_resume_noirq(struct device *dev) pm_runtime_mark_last_busy(dev); pm_request_autosuspend(dev); + at91_init_twi_bus(twi_dev); + return 0; } diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index 8477292e92c8..7aea28815d99 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -340,12 +340,15 @@ static int ismt_process_desc(const struct ismt_desc *desc, data->word = dma_buffer[0] | (dma_buffer[1] << 8); break; case I2C_SMBUS_BLOCK_DATA: - case I2C_SMBUS_I2C_BLOCK_DATA: if (desc->rxbytes != dma_buffer[0] + 1) return -EMSGSIZE; memcpy(data->block, dma_buffer, desc->rxbytes); break; + case I2C_SMBUS_I2C_BLOCK_DATA: + memcpy(&data->block[1], dma_buffer, desc->rxbytes); + data->block[0] = desc->rxbytes; + break; } return 0; } diff --git a/drivers/i2c/busses/i2c-meson.c b/drivers/i2c/busses/i2c-meson.c index 2aa61bbbd307..73b97c71a484 100644 --- a/drivers/i2c/busses/i2c-meson.c +++ b/drivers/i2c/busses/i2c-meson.c @@ -175,7 +175,7 @@ static void meson_i2c_put_data(struct meson_i2c *i2c, char *buf, int len) wdata1 |= *buf++ << ((i - 4) * 8); writel(wdata0, i2c->regs + REG_TOK_WDATA0); - writel(wdata0, i2c->regs + REG_TOK_WDATA1); + writel(wdata1, i2c->regs + REG_TOK_WDATA1); dev_dbg(i2c->dev, "%s: data %08x %08x len %d\n", __func__, wdata0, wdata1, len); diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index c21ca7bf2efe..8f1c5f24c1df 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -94,6 +94,12 @@ #define SB800_PIIX4_PORT_IDX_ALT 0x2e #define SB800_PIIX4_PORT_IDX_SEL 0x2f #define SB800_PIIX4_PORT_IDX_MASK 0x06 +#define SB800_PIIX4_PORT_IDX_SHIFT 1 + +/* On kerncz, SmBus0Sel is at bit 20:19 of PMx00 DecodeEn */ +#define SB800_PIIX4_PORT_IDX_KERNCZ 0x02 +#define SB800_PIIX4_PORT_IDX_MASK_KERNCZ 0x18 +#define SB800_PIIX4_PORT_IDX_SHIFT_KERNCZ 3 /* insmod parameters */ @@ -149,6 +155,8 @@ static const struct dmi_system_id piix4_dmi_ibm[] = { */ static DEFINE_MUTEX(piix4_mutex_sb800); static u8 piix4_port_sel_sb800; +static u8 piix4_port_mask_sb800; +static u8 piix4_port_shift_sb800; static const char *piix4_main_port_names_sb800[PIIX4_MAX_ADAPTERS] = { " port 0", " port 2", " port 3", " port 4" }; @@ -347,7 +355,19 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, /* Find which register is used for port selection */ if (PIIX4_dev->vendor == PCI_VENDOR_ID_AMD) { - piix4_port_sel_sb800 = SB800_PIIX4_PORT_IDX_ALT; + switch (PIIX4_dev->device) { + case PCI_DEVICE_ID_AMD_KERNCZ_SMBUS: + piix4_port_sel_sb800 = SB800_PIIX4_PORT_IDX_KERNCZ; + piix4_port_mask_sb800 = SB800_PIIX4_PORT_IDX_MASK_KERNCZ; + piix4_port_shift_sb800 = SB800_PIIX4_PORT_IDX_SHIFT_KERNCZ; + break; + case PCI_DEVICE_ID_AMD_HUDSON2_SMBUS: + default: + piix4_port_sel_sb800 = SB800_PIIX4_PORT_IDX_ALT; + piix4_port_mask_sb800 = SB800_PIIX4_PORT_IDX_MASK; + piix4_port_shift_sb800 = SB800_PIIX4_PORT_IDX_SHIFT; + break; + } } else { mutex_lock(&piix4_mutex_sb800); outb_p(SB800_PIIX4_PORT_IDX_SEL, SB800_PIIX4_SMB_IDX); @@ -355,6 +375,8 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, piix4_port_sel_sb800 = (port_sel & 0x01) ? SB800_PIIX4_PORT_IDX_ALT : SB800_PIIX4_PORT_IDX; + piix4_port_mask_sb800 = SB800_PIIX4_PORT_IDX_MASK; + piix4_port_shift_sb800 = SB800_PIIX4_PORT_IDX_SHIFT; mutex_unlock(&piix4_mutex_sb800); } @@ -616,8 +638,8 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1); port = adapdata->port; - if ((smba_en_lo & SB800_PIIX4_PORT_IDX_MASK) != port) - outb_p((smba_en_lo & ~SB800_PIIX4_PORT_IDX_MASK) | port, + if ((smba_en_lo & piix4_port_mask_sb800) != port) + outb_p((smba_en_lo & ~piix4_port_mask_sb800) | port, SB800_PIIX4_SMB_IDX + 1); retval = piix4_access(adap, addr, flags, read_write, @@ -706,7 +728,7 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba, adapdata->smba = smba; adapdata->sb800_main = sb800_main; - adapdata->port = port << 1; + adapdata->port = port << piix4_port_shift_sb800; /* set up the sysfs linkage to our parent device */ adap->dev.parent = &dev->dev; diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c index e6706a09e100..47c3d7f32900 100644 --- a/drivers/iio/adc/ad7793.c +++ b/drivers/iio/adc/ad7793.c @@ -257,7 +257,7 @@ static int ad7793_setup(struct iio_dev *indio_dev, unsigned int vref_mv) { struct ad7793_state *st = iio_priv(indio_dev); - int i, ret = -1; + int i, ret; unsigned long long scale_uv; u32 id; @@ -266,7 +266,7 @@ static int ad7793_setup(struct iio_dev *indio_dev, return ret; /* reset the serial interface */ - ret = spi_write(st->sd.spi, (u8 *)&ret, sizeof(ret)); + ret = ad_sd_reset(&st->sd, 32); if (ret < 0) goto out; usleep_range(500, 2000); /* Wait for at least 500us */ diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index d10bd0c97233..22c4c17cd996 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -177,6 +177,34 @@ out: } EXPORT_SYMBOL_GPL(ad_sd_read_reg); +/** + * ad_sd_reset() - Reset the serial interface + * + * @sigma_delta: The sigma delta device + * @reset_length: Number of SCLKs with DIN = 1 + * + * Returns 0 on success, an error code otherwise. + **/ +int ad_sd_reset(struct ad_sigma_delta *sigma_delta, + unsigned int reset_length) +{ + uint8_t *buf; + unsigned int size; + int ret; + + size = DIV_ROUND_UP(reset_length, 8); + buf = kcalloc(size, sizeof(*buf), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + memset(buf, 0xff, size); + ret = spi_write(sigma_delta->spi, buf, size); + kfree(buf); + + return ret; +} +EXPORT_SYMBOL_GPL(ad_sd_reset); + static int ad_sd_calibrate(struct ad_sigma_delta *sigma_delta, unsigned int mode, unsigned int channel) { diff --git a/drivers/iio/adc/axp288_adc.c b/drivers/iio/adc/axp288_adc.c index 7fd24949c0c1..64799ad7ebad 100644 --- a/drivers/iio/adc/axp288_adc.c +++ b/drivers/iio/adc/axp288_adc.c @@ -28,8 +28,6 @@ #include #define AXP288_ADC_EN_MASK 0xF1 -#define AXP288_ADC_TS_PIN_GPADC 0xF2 -#define AXP288_ADC_TS_PIN_ON 0xF3 enum axp288_adc_id { AXP288_ADC_TS, @@ -123,16 +121,6 @@ static int axp288_adc_read_channel(int *val, unsigned long address, return IIO_VAL_INT; } -static int axp288_adc_set_ts(struct regmap *regmap, unsigned int mode, - unsigned long address) -{ - /* channels other than GPADC do not need to switch TS pin */ - if (address != AXP288_GP_ADC_H) - return 0; - - return regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, mode); -} - static int axp288_adc_read_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int *val, int *val2, long mask) @@ -143,16 +131,7 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev, mutex_lock(&indio_dev->mlock); switch (mask) { case IIO_CHAN_INFO_RAW: - if (axp288_adc_set_ts(info->regmap, AXP288_ADC_TS_PIN_GPADC, - chan->address)) { - dev_err(&indio_dev->dev, "GPADC mode\n"); - ret = -EINVAL; - break; - } ret = axp288_adc_read_channel(val, chan->address, info->regmap); - if (axp288_adc_set_ts(info->regmap, AXP288_ADC_TS_PIN_ON, - chan->address)) - dev_err(&indio_dev->dev, "TS pin restore\n"); break; default: ret = -EINVAL; @@ -162,15 +141,6 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev, return ret; } -static int axp288_adc_set_state(struct regmap *regmap) -{ - /* ADC should be always enabled for internal FG to function */ - if (regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, AXP288_ADC_TS_PIN_ON)) - return -EIO; - - return regmap_write(regmap, AXP20X_ADC_EN1, AXP288_ADC_EN_MASK); -} - static const struct iio_info axp288_adc_iio_info = { .read_raw = &axp288_adc_read_raw, .driver_module = THIS_MODULE, @@ -199,7 +169,7 @@ static int axp288_adc_probe(struct platform_device *pdev) * Set ADC to enabled state at all time, including system suspend. * otherwise internal fuel gauge functionality may be affected. */ - ret = axp288_adc_set_state(axp20x->regmap); + ret = regmap_write(info->regmap, AXP20X_ADC_EN1, AXP288_ADC_EN_MASK); if (ret) { dev_err(&pdev->dev, "unable to enable ADC device\n"); return ret; diff --git a/drivers/iio/adc/fsl-imx25-gcq.c b/drivers/iio/adc/fsl-imx25-gcq.c index 72b32c1ab257..ea264fa9e567 100644 --- a/drivers/iio/adc/fsl-imx25-gcq.c +++ b/drivers/iio/adc/fsl-imx25-gcq.c @@ -401,6 +401,7 @@ static const struct of_device_id mx25_gcq_ids[] = { { .compatible = "fsl,imx25-gcq", }, { /* Sentinel */ } }; +MODULE_DEVICE_TABLE(of, mx25_gcq_ids); static struct platform_driver mx25_gcq_driver = { .driver = { diff --git a/drivers/iio/adc/mcp320x.c b/drivers/iio/adc/mcp320x.c index 634717ae12f3..071dd23a33d9 100644 --- a/drivers/iio/adc/mcp320x.c +++ b/drivers/iio/adc/mcp320x.c @@ -17,6 +17,8 @@ * MCP3204 * MCP3208 * ------------ + * 13 bit converter + * MCP3301 * * Datasheet can be found here: * http://ww1.microchip.com/downloads/en/DeviceDoc/21293C.pdf mcp3001 @@ -96,7 +98,7 @@ static int mcp320x_channel_to_tx_data(int device_index, } static int mcp320x_adc_conversion(struct mcp320x *adc, u8 channel, - bool differential, int device_index) + bool differential, int device_index, int *val) { int ret; @@ -117,19 +119,25 @@ static int mcp320x_adc_conversion(struct mcp320x *adc, u8 channel, switch (device_index) { case mcp3001: - return (adc->rx_buf[0] << 5 | adc->rx_buf[1] >> 3); + *val = (adc->rx_buf[0] << 5 | adc->rx_buf[1] >> 3); + return 0; case mcp3002: case mcp3004: case mcp3008: - return (adc->rx_buf[0] << 2 | adc->rx_buf[1] >> 6); + *val = (adc->rx_buf[0] << 2 | adc->rx_buf[1] >> 6); + return 0; case mcp3201: - return (adc->rx_buf[0] << 7 | adc->rx_buf[1] >> 1); + *val = (adc->rx_buf[0] << 7 | adc->rx_buf[1] >> 1); + return 0; case mcp3202: case mcp3204: case mcp3208: - return (adc->rx_buf[0] << 4 | adc->rx_buf[1] >> 4); + *val = (adc->rx_buf[0] << 4 | adc->rx_buf[1] >> 4); + return 0; case mcp3301: - return sign_extend32((adc->rx_buf[0] & 0x1f) << 8 | adc->rx_buf[1], 12); + *val = sign_extend32((adc->rx_buf[0] & 0x1f) << 8 + | adc->rx_buf[1], 12); + return 0; default: return -EINVAL; } @@ -150,12 +158,10 @@ static int mcp320x_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: ret = mcp320x_adc_conversion(adc, channel->address, - channel->differential, device_index); - + channel->differential, device_index, val); if (ret < 0) goto out; - *val = ret; ret = IIO_VAL_INT; break; @@ -312,6 +318,7 @@ static int mcp320x_probe(struct spi_device *spi) indio_dev->name = spi_get_device_id(spi)->name; indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->info = &mcp320x_info; + spi_set_drvdata(spi, indio_dev); chip_info = &mcp320x_chip_infos[spi_get_device_id(spi)->driver_data]; indio_dev->channels = chip_info->channels; diff --git a/drivers/iio/adc/twl4030-madc.c b/drivers/iio/adc/twl4030-madc.c index 0c74869a540a..7ffc5db4d7ee 100644 --- a/drivers/iio/adc/twl4030-madc.c +++ b/drivers/iio/adc/twl4030-madc.c @@ -866,8 +866,10 @@ static int twl4030_madc_probe(struct platform_device *pdev) /* Enable 3v1 bias regulator for MADC[3:6] */ madc->usb3v1 = devm_regulator_get(madc->dev, "vusb3v1"); - if (IS_ERR(madc->usb3v1)) - return -ENODEV; + if (IS_ERR(madc->usb3v1)) { + ret = -ENODEV; + goto err_i2c; + } ret = regulator_enable(madc->usb3v1); if (ret) @@ -876,11 +878,13 @@ static int twl4030_madc_probe(struct platform_device *pdev) ret = iio_device_register(iio_dev); if (ret) { dev_err(&pdev->dev, "could not register iio device\n"); - goto err_i2c; + goto err_usb3v1; } return 0; +err_usb3v1: + regulator_disable(madc->usb3v1); err_i2c: twl4030_madc_set_current_generator(madc, 0, 0); err_current_generator: diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index 0a6beb3d99cb..56cf5907a5f0 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -1208,7 +1208,7 @@ static int xadc_probe(struct platform_device *pdev) ret = xadc->ops->setup(pdev, indio_dev, irq); if (ret) - goto err_free_samplerate_trigger; + goto err_clk_disable_unprepare; ret = request_irq(irq, xadc->ops->interrupt_handler, 0, dev_name(&pdev->dev), indio_dev); @@ -1268,6 +1268,8 @@ static int xadc_probe(struct platform_device *pdev) err_free_irq: free_irq(irq, indio_dev); +err_clk_disable_unprepare: + clk_disable_unprepare(xadc->clk); err_free_samplerate_trigger: if (xadc->ops->flags & XADC_FLAGS_BUFFERED) iio_trigger_free(xadc->samplerate_trigger); @@ -1277,8 +1279,6 @@ err_free_convst_trigger: err_triggered_buffer_cleanup: if (xadc->ops->flags & XADC_FLAGS_BUFFERED) iio_triggered_buffer_cleanup(indio_dev); -err_clk_disable_unprepare: - clk_disable_unprepare(xadc->clk); err_device_free: kfree(indio_dev->channels); diff --git a/drivers/iio/dummy/iio_simple_dummy_events.c b/drivers/iio/dummy/iio_simple_dummy_events.c index ed63ffd849f8..7ec2a0bb0807 100644 --- a/drivers/iio/dummy/iio_simple_dummy_events.c +++ b/drivers/iio/dummy/iio_simple_dummy_events.c @@ -72,6 +72,7 @@ int iio_simple_dummy_write_event_config(struct iio_dev *indio_dev, st->event_en = state; else return -EINVAL; + break; default: return -EINVAL; } diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index fc340ed3dca1..c5bc73135436 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -306,8 +306,10 @@ static ssize_t iio_debugfs_read_reg(struct file *file, char __user *userbuf, ret = indio_dev->info->debugfs_reg_access(indio_dev, indio_dev->cached_reg_addr, 0, &val); - if (ret) + if (ret) { dev_err(indio_dev->dev.parent, "%s: read failed\n", __func__); + return ret; + } len = snprintf(buf, sizeof(buf), "0x%X\n", val); diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index f762eb8b174a..19aa957bd454 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -558,7 +558,7 @@ static int bmp280_chip_config(struct bmp280_data *data) u8 osrs = BMP280_OSRS_TEMP_X(data->oversampling_temp + 1) | BMP280_OSRS_PRESS_X(data->oversampling_press + 1); - ret = regmap_update_bits(data->regmap, BMP280_REG_CTRL_MEAS, + ret = regmap_write_bits(data->regmap, BMP280_REG_CTRL_MEAS, BMP280_OSRS_TEMP_MASK | BMP280_OSRS_PRESS_MASK | BMP280_MODE_MASK, diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 63e82f8e8308..fb4ce0394ac7 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -446,15 +446,10 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6); if (ret < 0) - goto put; + return ret; rt = (struct rt6_info *)dst; - if (ipv6_addr_any(&fl6.saddr)) { - ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev, - &fl6.daddr, 0, &fl6.saddr); - if (ret) - goto put; - + if (ipv6_addr_any(&src_in->sin6_addr)) { src_in->sin6_family = AF_INET6; src_in->sin6_addr = fl6.saddr; } @@ -471,9 +466,6 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, *pdst = dst; return 0; -put: - dst_release(dst); - return ret; } #else static int addr6_resolve(struct sockaddr_in6 *src_in, diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 9398143d7c5e..6512a555f7f8 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2577,9 +2577,9 @@ fail: c4iw_put_ep(&child_ep->com); reject: reject_cr(dev, hwtid, skb); +out: if (parent_ep) c4iw_put_ep(&parent_ep->com); -out: return 0; } @@ -3441,7 +3441,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) cm_id->provider_data = ep; goto out; } - + remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid); cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, ep->com.local_addr.ss_family); fail2: diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 34cfd341b6d6..a3dd27b1305d 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -297,14 +297,15 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, * The resulting value will be rounded down to the closest * multiple of dd->rcv_entries.group_size. */ - rcd->egrbufs.buffers = kcalloc(rcd->egrbufs.count, - sizeof(*rcd->egrbufs.buffers), - GFP_KERNEL); + rcd->egrbufs.buffers = kzalloc_node( + rcd->egrbufs.count * sizeof(*rcd->egrbufs.buffers), + GFP_KERNEL, numa); if (!rcd->egrbufs.buffers) goto bail; - rcd->egrbufs.rcvtids = kcalloc(rcd->egrbufs.count, - sizeof(*rcd->egrbufs.rcvtids), - GFP_KERNEL); + rcd->egrbufs.rcvtids = kzalloc_node( + rcd->egrbufs.count * + sizeof(*rcd->egrbufs.rcvtids), + GFP_KERNEL, numa); if (!rcd->egrbufs.rcvtids) goto bail; rcd->egrbufs.size = eager_buffer_size; @@ -322,8 +323,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE; if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */ - rcd->opstats = kzalloc(sizeof(*rcd->opstats), - GFP_KERNEL); + rcd->opstats = kzalloc_node(sizeof(*rcd->opstats), + GFP_KERNEL, numa); if (!rcd->opstats) goto bail; } diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 4ac8f330c5cb..335613a1a46a 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -673,12 +673,12 @@ MODULE_PARM_DESC(pcie_retry, "Driver will try this many times to reach requested #define UNSET_PSET 255 #define DEFAULT_DISCRETE_PSET 2 /* discrete HFI */ -#define DEFAULT_MCP_PSET 4 /* MCP HFI */ +#define DEFAULT_MCP_PSET 6 /* MCP HFI */ static uint pcie_pset = UNSET_PSET; module_param(pcie_pset, uint, S_IRUGO); MODULE_PARM_DESC(pcie_pset, "PCIe Eq Pset value to use, range is 0-10"); -static uint pcie_ctle = 1; /* discrete on, integrated off */ +static uint pcie_ctle = 3; /* discrete on, integrated on */ module_param(pcie_ctle, uint, S_IRUGO); MODULE_PARM_DESC(pcie_ctle, "PCIe static CTLE mode, bit 0 - discrete on/off, bit 1 - integrated on/off"); diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 4bd5b5caa243..613074e963bb 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -551,7 +551,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) case IB_WR_RDMA_WRITE: if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - /* FALLTHROUGH */ + goto no_flow_control; case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && @@ -559,6 +559,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } +no_flow_control: put_ib_reth_vaddr( wqe->rdma_wr.remote_addr, &ohdr->u.rc.reth); diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index ded27172320e..cedb447d0162 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -7080,7 +7080,7 @@ static void qib_7322_txchk_change(struct qib_devdata *dd, u32 start, unsigned long flags; while (wait) { - unsigned long shadow; + unsigned long shadow = 0; int cstart, previ = -1; /* diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index f3fe787c9426..c1523f9a3c12 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -357,7 +357,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) case IB_WR_RDMA_WRITE: if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - /* FALLTHROUGH */ + goto no_flow_control; case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && @@ -365,7 +365,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } - +no_flow_control: ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 6bac0717c540..ee26a1b1b4ed 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -274,6 +274,7 @@ static u32 alloc_index(struct rxe_pool *pool) if (index >= range) index = find_first_zero_bit(pool->table, range); + WARN_ON_ONCE(index >= range); set_bit(index, pool->table); pool->last = index; return index + pool->min_index; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index ccf624763565..69ed4e0d7a0d 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -418,7 +418,7 @@ static enum resp_states check_length(struct rxe_qp *qp, static enum resp_states check_rkey(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { - struct rxe_mem *mem; + struct rxe_mem *mem = NULL; u64 va; u32 rkey; u32 resid; @@ -452,38 +452,38 @@ static enum resp_states check_rkey(struct rxe_qp *qp, mem = lookup_mem(qp->pd, access, rkey, lookup_remote); if (!mem) { state = RESPST_ERR_RKEY_VIOLATION; - goto err1; + goto err; } if (unlikely(mem->state == RXE_MEM_STATE_FREE)) { state = RESPST_ERR_RKEY_VIOLATION; - goto err1; + goto err; } if (mem_check_range(mem, va, resid)) { state = RESPST_ERR_RKEY_VIOLATION; - goto err2; + goto err; } if (pkt->mask & RXE_WRITE_MASK) { if (resid > mtu) { if (pktlen != mtu || bth_pad(pkt)) { state = RESPST_ERR_LENGTH; - goto err2; + goto err; } qp->resp.resid = mtu; } else { if (pktlen != resid) { state = RESPST_ERR_LENGTH; - goto err2; + goto err; } if ((bth_pad(pkt) != (0x3 & (-resid)))) { /* This case may not be exactly that * but nothing else fits. */ state = RESPST_ERR_LENGTH; - goto err2; + goto err; } } } @@ -493,9 +493,9 @@ static enum resp_states check_rkey(struct rxe_qp *qp, qp->resp.mr = mem; return RESPST_EXECUTE; -err2: - rxe_drop_ref(mem); -err1: +err: + if (mem) + rxe_drop_ref(mem); return state; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 08c4b0287304..183db0cd849e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1302,7 +1302,7 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) rcu_dereference_protected(neigh->hnext, lockdep_is_held(&priv->lock))); /* remove from path/mc list */ - list_del(&neigh->list); + list_del_init(&neigh->list); call_rcu(&neigh->rcu, ipoib_neigh_reclaim); } else { np = &neigh->hnext; @@ -1466,7 +1466,7 @@ void ipoib_neigh_free(struct ipoib_neigh *neigh) rcu_dereference_protected(neigh->hnext, lockdep_is_held(&priv->lock))); /* remove from parent list */ - list_del(&neigh->list); + list_del_init(&neigh->list); call_rcu(&neigh->rcu, ipoib_neigh_reclaim); return; } else { @@ -1551,7 +1551,7 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) rcu_dereference_protected(neigh->hnext, lockdep_is_held(&priv->lock))); /* remove from parent list */ - list_del(&neigh->list); + list_del_init(&neigh->list); call_rcu(&neigh->rcu, ipoib_neigh_reclaim); } else { np = &neigh->hnext; @@ -1593,7 +1593,7 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) rcu_dereference_protected(neigh->hnext, lockdep_is_held(&priv->lock))); /* remove from path/mc list */ - list_del(&neigh->list); + list_del_init(&neigh->list); call_rcu(&neigh->rcu, ipoib_neigh_reclaim); } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 57eadd2b7a71..93b50be14438 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -165,11 +165,11 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) out: up_write(&ppriv->vlan_rwsem); + rtnl_unlock(); + if (result) free_netdev(priv->dev); - rtnl_unlock(); - return result; } @@ -193,7 +193,6 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { if (priv->pkey == pkey && priv->child_type == IPOIB_LEGACY_CHILD) { - unregister_netdevice(priv->dev); list_del(&priv->list); dev = priv->dev; break; @@ -201,6 +200,11 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) } up_write(&ppriv->vlan_rwsem); + if (dev) { + ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name); + unregister_netdevice(dev); + } + rtnl_unlock(); if (dev) { diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 5be14ad29d46..dbf09836ff30 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -904,6 +904,13 @@ static const struct dmi_system_id __initconst i8042_dmi_kbdreset_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "P34"), }, }, + { + /* Gigabyte P57 - Elantech touchpad */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"), + DMI_MATCH(DMI_PRODUCT_NAME, "P57"), + }, + }, { /* Schenker XMG C504 - Elantech touchpad */ .matches = { diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index c380b7e8f1c6..1a0b110f12c0 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3120,6 +3120,7 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, mutex_unlock(&domain->api_lock); domain_flush_tlb_pde(domain); + domain_flush_complete(domain); return unmap_size; } diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 8f7281444551..5a9a4416f467 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1211,7 +1211,7 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain, continue; s2cr[idx].type = type; - s2cr[idx].privcfg = S2CR_PRIVCFG_UNPRIV; + s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT; s2cr[idx].cbndx = cbndx; arm_smmu_write_s2cr(smmu, idx); } diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 30808e91b775..c7820b3ea80e 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -542,7 +542,10 @@ static void sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data, spin_lock_irqsave(&data->lock, flags); if (is_sysmmu_active(data) && data->version >= MAKE_MMU_VER(3, 3)) { clk_enable(data->clk_master); - __sysmmu_tlb_invalidate_entry(data, iova, 1); + if (sysmmu_block(data)) { + __sysmmu_tlb_invalidate_entry(data, iova, 1); + sysmmu_unblock(data); + } clk_disable(data->clk_master); } spin_unlock_irqrestore(&data->lock, flags); diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index f5c90e1366ce..7c9d632f1d09 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -335,8 +335,12 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) pte |= ARM_LPAE_PTE_NSTABLE; __arm_lpae_set_pte(ptep, pte, cfg); - } else { + } else if (!iopte_leaf(pte, lvl)) { cptep = iopte_deref(pte, data); + } else { + /* We require an unmap first */ + WARN_ON(!selftest_running); + return -EEXIST; } /* Rinse, repeat */ diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c index 1eef56a89b1f..05bbf171df37 100644 --- a/drivers/irqchip/irq-crossbar.c +++ b/drivers/irqchip/irq-crossbar.c @@ -198,7 +198,8 @@ static const struct irq_domain_ops crossbar_domain_ops = { static int __init crossbar_of_init(struct device_node *node) { - int i, size, max = 0, reserved = 0, entry; + int i, size, reserved = 0; + u32 max = 0, entry; const __be32 *irqsr; int ret = -ENOMEM; diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index bf3fbd00a091..64b586458d3d 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -828,7 +828,6 @@ isdn_ppp_write(int min, struct file *file, const char __user *buf, int count) isdn_net_local *lp; struct ippp_struct *is; int proto; - unsigned char protobuf[4]; is = file->private_data; @@ -842,24 +841,28 @@ isdn_ppp_write(int min, struct file *file, const char __user *buf, int count) if (!lp) printk(KERN_DEBUG "isdn_ppp_write: lp == NULL\n"); else { - /* - * Don't reset huptimer for - * LCP packets. (Echo requests). - */ - if (copy_from_user(protobuf, buf, 4)) - return -EFAULT; - proto = PPP_PROTOCOL(protobuf); - if (proto != PPP_LCP) - lp->huptimer = 0; + if (lp->isdn_device < 0 || lp->isdn_channel < 0) { + unsigned char protobuf[4]; + /* + * Don't reset huptimer for + * LCP packets. (Echo requests). + */ + if (copy_from_user(protobuf, buf, 4)) + return -EFAULT; + + proto = PPP_PROTOCOL(protobuf); + if (proto != PPP_LCP) + lp->huptimer = 0; - if (lp->isdn_device < 0 || lp->isdn_channel < 0) return 0; + } if ((dev->drv[lp->isdn_device]->flags & DRV_FLAG_RUNNING) && lp->dialstate == 0 && (lp->flags & ISDN_NET_CONNECTED)) { unsigned short hl; struct sk_buff *skb; + unsigned char *cpy_buf; /* * we need to reserve enough space in front of * sk_buff. old call to dev_alloc_skb only reserved @@ -872,11 +875,21 @@ isdn_ppp_write(int min, struct file *file, const char __user *buf, int count) return count; } skb_reserve(skb, hl); - if (copy_from_user(skb_put(skb, count), buf, count)) + cpy_buf = skb_put(skb, count); + if (copy_from_user(cpy_buf, buf, count)) { kfree_skb(skb); return -EFAULT; } + + /* + * Don't reset huptimer for + * LCP packets. (Echo requests). + */ + proto = PPP_PROTOCOL(cpy_buf); + if (proto != PPP_LCP) + lp->huptimer = 0; + if (is->debug & 0x40) { printk(KERN_DEBUG "ppp xmit: len %d\n", (int) skb->len); isdn_ppp_frame_log("xmit", skb->data, skb->len, 32, is->unit, lp->ppp_slot); diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index c3ea03c9a1a8..02619cabda8b 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -333,6 +333,7 @@ struct cached_dev { /* Limit number of writeback bios in flight */ struct semaphore in_flight; struct task_struct *writeback_thread; + struct workqueue_struct *writeback_write_wq; struct keybuf writeback_keys; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index a37c1776f2e3..e0f1c6d534fe 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -196,12 +196,12 @@ static void bch_data_insert_start(struct closure *cl) struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); struct bio *bio = op->bio, *n; - if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) - wake_up_gc(op->c); - if (op->bypass) return bch_data_invalidate(cl); + if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) + wake_up_gc(op->c); + /* * Journal writes are marked REQ_PREFLUSH; if the original write was a * flush, it'll wait on the journal write. diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 66669c8f4161..f4557f558b24 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1025,7 +1025,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) } if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { - bch_sectors_dirty_init(dc); + bch_sectors_dirty_init(&dc->disk); atomic_set(&dc->has_dirty, 1); atomic_inc(&dc->count); bch_writeback_queue(dc); @@ -1058,6 +1058,8 @@ static void cached_dev_free(struct closure *cl) cancel_delayed_work_sync(&dc->writeback_rate_update); if (!IS_ERR_OR_NULL(dc->writeback_thread)) kthread_stop(dc->writeback_thread); + if (dc->writeback_write_wq) + destroy_workqueue(dc->writeback_write_wq); mutex_lock(&bch_register_lock); @@ -1229,6 +1231,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) goto err; bcache_device_attach(d, c, u - c->uuids); + bch_sectors_dirty_init(d); bch_flash_dev_request_init(d); add_disk(d->disk); @@ -1967,6 +1970,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, else err = "device busy"; mutex_unlock(&bch_register_lock); + if (!IS_ERR(bdev)) + bdput(bdev); if (attr == &ksysfs_register_quiet) goto out; } diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index b3ff57d61dde..4fbb5532f24c 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -191,7 +191,7 @@ STORE(__cached_dev) { struct cached_dev *dc = container_of(kobj, struct cached_dev, disk.kobj); - unsigned v = size; + ssize_t v = size; struct cache_set *c; struct kobj_uevent_env *env; @@ -226,7 +226,7 @@ STORE(__cached_dev) bch_cached_dev_run(dc); if (attr == &sysfs_cache_mode) { - ssize_t v = bch_read_string_list(buf, bch_cache_modes + 1); + v = bch_read_string_list(buf, bch_cache_modes + 1); if (v < 0) return v; diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index dde6172f3f10..eb70f6894f05 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -73,24 +73,44 @@ STRTO_H(strtouint, unsigned int) STRTO_H(strtoll, long long) STRTO_H(strtoull, unsigned long long) +/** + * bch_hprint() - formats @v to human readable string for sysfs. + * + * @v - signed 64 bit integer + * @buf - the (at least 8 byte) buffer to format the result into. + * + * Returns the number of bytes used by format. + */ ssize_t bch_hprint(char *buf, int64_t v) { static const char units[] = "?kMGTPEZY"; - char dec[4] = ""; - int u, t = 0; + int u = 0, t; - for (u = 0; v >= 1024 || v <= -1024; u++) { - t = v & ~(~0 << 10); - v >>= 10; - } + uint64_t q; - if (!u) - return sprintf(buf, "%llu", v); + if (v < 0) + q = -v; + else + q = v; - if (v < 100 && v > -100) - snprintf(dec, sizeof(dec), ".%i", t / 100); + /* For as long as the number is more than 3 digits, but at least + * once, shift right / divide by 1024. Keep the remainder for + * a digit after the decimal point. + */ + do { + u++; - return sprintf(buf, "%lli%s%c", v, dec, units[u]); + t = q & ~(~0 << 10); + q >>= 10; + } while (q >= 1000); + + if (v < 0) + /* '-', up to 3 digits, '.', 1 digit, 1 character, null; + * yields 8 bytes. + */ + return sprintf(buf, "-%llu.%i%c", q, t * 10 / 1024, units[u]); + else + return sprintf(buf, "%llu.%i%c", q, t * 10 / 1024, units[u]); } ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[], diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index e51644e503a5..4ce2b19fe120 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -20,7 +20,8 @@ static void __update_writeback_rate(struct cached_dev *dc) { struct cache_set *c = dc->disk.c; - uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size; + uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size - + bcache_flash_devs_sectors_dirty(c); uint64_t cache_dirty_target = div_u64(cache_sectors * dc->writeback_percent, 100); @@ -186,7 +187,7 @@ static void write_dirty(struct closure *cl) closure_bio_submit(&io->bio, cl); - continue_at(cl, write_dirty_finish, system_wq); + continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq); } static void read_dirty_endio(struct bio *bio) @@ -206,7 +207,7 @@ static void read_dirty_submit(struct closure *cl) closure_bio_submit(&io->bio, cl); - continue_at(cl, write_dirty, system_wq); + continue_at(cl, write_dirty, io->dc->writeback_write_wq); } static void read_dirty(struct cached_dev *dc) @@ -482,17 +483,17 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, return MAP_CONTINUE; } -void bch_sectors_dirty_init(struct cached_dev *dc) +void bch_sectors_dirty_init(struct bcache_device *d) { struct sectors_dirty_init op; bch_btree_op_init(&op.op, -1); - op.inode = dc->disk.id; + op.inode = d->id; - bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0), + bch_btree_map_keys(&op.op, d->c, &KEY(op.inode, 0, 0), sectors_dirty_init_fn, 0); - dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk); + d->sectors_dirty_last = bcache_dev_sectors_dirty(d); } void bch_cached_dev_writeback_init(struct cached_dev *dc) @@ -516,6 +517,11 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) int bch_cached_dev_writeback_start(struct cached_dev *dc) { + dc->writeback_write_wq = alloc_workqueue("bcache_writeback_wq", + WQ_MEM_RECLAIM, 0); + if (!dc->writeback_write_wq) + return -ENOMEM; + dc->writeback_thread = kthread_create(bch_writeback_thread, dc, "bcache_writeback"); if (IS_ERR(dc->writeback_thread)) diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 301eaf565167..cdf8d253209e 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -14,6 +14,25 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) return ret; } +static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c) +{ + uint64_t i, ret = 0; + + mutex_lock(&bch_register_lock); + + for (i = 0; i < c->nr_uuids; i++) { + struct bcache_device *d = c->devices[i]; + + if (!d || !UUID_FLASH_ONLY(&c->uuids[i])) + continue; + ret += bcache_dev_sectors_dirty(d); + } + + mutex_unlock(&bch_register_lock); + + return ret; +} + static inline unsigned offset_to_stripe(struct bcache_device *d, uint64_t offset) { @@ -85,7 +104,7 @@ static inline void bch_writeback_add(struct cached_dev *dc) void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); -void bch_sectors_dirty_init(struct cached_dev *dc); +void bch_sectors_dirty_init(struct bcache_device *); void bch_cached_dev_writeback_init(struct cached_dev *); int bch_cached_dev_writeback_start(struct cached_dev *); diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 2d826927a3bf..fb02c3979bf4 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1992,6 +1992,11 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, long pages; struct bitmap_page *new_bp; + if (bitmap->storage.file && !init) { + pr_info("md: cannot resize file-based bitmap\n"); + return -EINVAL; + } + if (chunksize == 0) { /* If there is enough space, leave the chunk size unchanged, * else increase by factor of two until there is enough space. diff --git a/drivers/md/linear.c b/drivers/md/linear.c index b0c0aef92a37..12abf69d568a 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -223,7 +223,8 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev) * oldconf until no one uses it anymore. */ mddev_suspend(mddev); - oldconf = rcu_dereference(mddev->private); + oldconf = rcu_dereference_protected(mddev->private, + lockdep_is_held(&mddev->reconfig_mutex)); mddev->raid_disks++; WARN_ONCE(mddev->raid_disks != newconf->raid_disks, "copied raid_disks doesn't match mddev->raid_disks"); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 4c4aab02e311..b19b551bb34b 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1407,11 +1407,24 @@ retry_write: mbio->bi_private = r10_bio; atomic_inc(&r10_bio->remaining); + + cb = blk_check_plugged(raid10_unplug, mddev, + sizeof(*plug)); + if (cb) + plug = container_of(cb, struct raid10_plug_cb, + cb); + else + plug = NULL; spin_lock_irqsave(&conf->device_lock, flags); - bio_list_add(&conf->pending_bio_list, mbio); - conf->pending_count++; + if (plug) { + bio_list_add(&plug->pending, mbio); + plug->pending_cnt++; + } else { + bio_list_add(&conf->pending_bio_list, mbio); + conf->pending_count++; + } spin_unlock_irqrestore(&conf->device_lock, flags); - if (!mddev_check_plugged(mddev)) + if (!plug) md_wakeup_thread(mddev->thread); } } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 549b4afd12e1..7aea0221530c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -829,6 +829,14 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh spin_unlock(&head->batch_head->batch_lock); goto unlock_out; } + /* + * We must assign batch_head of this stripe within the + * batch_lock, otherwise clear_batch_ready of batch head + * stripe could clear BATCH_READY bit of this stripe and + * this stripe->batch_head doesn't get assigned, which + * could confuse clear_batch_ready for this stripe + */ + sh->batch_head = head->batch_head; /* * at this point, head's BATCH_READY could be cleared, but we @@ -836,8 +844,6 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh */ list_add(&sh->batch_list, &head->batch_list); spin_unlock(&head->batch_head->batch_lock); - - sh->batch_head = head->batch_head; } else { head->batch_head = head; sh->batch_head = head->batch_head; @@ -4277,7 +4283,8 @@ static void break_stripe_batch_list(struct stripe_head *head_sh, set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS | (1 << STRIPE_PREREAD_ACTIVE) | - (1 << STRIPE_DEGRADED)), + (1 << STRIPE_DEGRADED) | + (1 << STRIPE_ON_UNPLUG_LIST)), head_sh->state & (1 << STRIPE_INSYNC)); sh->check_state = head_sh->check_state; diff --git a/drivers/media/pci/ttpci/av7110_hw.c b/drivers/media/pci/ttpci/av7110_hw.c index 0583d56ef5ef..41ba8489db34 100644 --- a/drivers/media/pci/ttpci/av7110_hw.c +++ b/drivers/media/pci/ttpci/av7110_hw.c @@ -56,11 +56,11 @@ by Nathan Laredo */ int av7110_debiwrite(struct av7110 *av7110, u32 config, - int addr, u32 val, int count) + int addr, u32 val, unsigned int count) { struct saa7146_dev *dev = av7110->dev; - if (count <= 0 || count > 32764) { + if (count > 32764) { printk("%s: invalid count %d\n", __func__, count); return -1; } @@ -78,12 +78,12 @@ int av7110_debiwrite(struct av7110 *av7110, u32 config, return 0; } -u32 av7110_debiread(struct av7110 *av7110, u32 config, int addr, int count) +u32 av7110_debiread(struct av7110 *av7110, u32 config, int addr, unsigned int count) { struct saa7146_dev *dev = av7110->dev; u32 result = 0; - if (count > 32764 || count <= 0) { + if (count > 32764) { printk("%s: invalid count %d\n", __func__, count); return 0; } diff --git a/drivers/media/pci/ttpci/av7110_hw.h b/drivers/media/pci/ttpci/av7110_hw.h index 1634aba5cb84..ccb148059406 100644 --- a/drivers/media/pci/ttpci/av7110_hw.h +++ b/drivers/media/pci/ttpci/av7110_hw.h @@ -377,14 +377,14 @@ extern int av7110_fw_request(struct av7110 *av7110, u16 *request_buf, /* DEBI (saa7146 data extension bus interface) access */ extern int av7110_debiwrite(struct av7110 *av7110, u32 config, - int addr, u32 val, int count); + int addr, u32 val, unsigned int count); extern u32 av7110_debiread(struct av7110 *av7110, u32 config, - int addr, int count); + int addr, unsigned int count); /* DEBI during interrupt */ /* single word writes */ -static inline void iwdebi(struct av7110 *av7110, u32 config, int addr, u32 val, int count) +static inline void iwdebi(struct av7110 *av7110, u32 config, int addr, u32 val, unsigned int count) { av7110_debiwrite(av7110, config, addr, val, count); } @@ -397,7 +397,7 @@ static inline void mwdebi(struct av7110 *av7110, u32 config, int addr, av7110_debiwrite(av7110, config, addr, 0, count); } -static inline u32 irdebi(struct av7110 *av7110, u32 config, int addr, u32 val, int count) +static inline u32 irdebi(struct av7110 *av7110, u32 config, int addr, u32 val, unsigned int count) { u32 res; @@ -408,7 +408,7 @@ static inline u32 irdebi(struct av7110 *av7110, u32 config, int addr, u32 val, i } /* DEBI outside interrupts, only for count <= 4! */ -static inline void wdebi(struct av7110 *av7110, u32 config, int addr, u32 val, int count) +static inline void wdebi(struct av7110 *av7110, u32 config, int addr, u32 val, unsigned int count) { unsigned long flags; @@ -417,7 +417,7 @@ static inline void wdebi(struct av7110 *av7110, u32 config, int addr, u32 val, i spin_unlock_irqrestore(&av7110->debilock, flags); } -static inline u32 rdebi(struct av7110 *av7110, u32 config, int addr, u32 val, int count) +static inline u32 rdebi(struct av7110 *av7110, u32 config, int addr, u32 val, unsigned int count) { unsigned long flags; u32 res; diff --git a/drivers/media/platform/exynos-gsc/gsc-core.c b/drivers/media/platform/exynos-gsc/gsc-core.c index 787bd16c19e5..bbb5feef8308 100644 --- a/drivers/media/platform/exynos-gsc/gsc-core.c +++ b/drivers/media/platform/exynos-gsc/gsc-core.c @@ -849,9 +849,7 @@ int gsc_prepare_addr(struct gsc_ctx *ctx, struct vb2_buffer *vb, if ((frame->fmt->pixelformat == V4L2_PIX_FMT_VYUY) || (frame->fmt->pixelformat == V4L2_PIX_FMT_YVYU) || - (frame->fmt->pixelformat == V4L2_PIX_FMT_NV61) || (frame->fmt->pixelformat == V4L2_PIX_FMT_YVU420) || - (frame->fmt->pixelformat == V4L2_PIX_FMT_NV21) || (frame->fmt->pixelformat == V4L2_PIX_FMT_YVU420M)) swap(addr->cb, addr->cr); diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c index c2ee6e39fd0c..20397aba6849 100644 --- a/drivers/media/usb/uvc/uvc_ctrl.c +++ b/drivers/media/usb/uvc/uvc_ctrl.c @@ -2002,6 +2002,13 @@ int uvc_ctrl_add_mapping(struct uvc_video_chain *chain, goto done; } + /* Validate the user-provided bit-size and offset */ + if (mapping->size > 32 || + mapping->offset + mapping->size > ctrl->info.size * 8) { + ret = -EINVAL; + goto done; + } + list_for_each_entry(map, &ctrl->info.mappings, list) { if (mapping->id == map->id) { uvc_trace(UVC_TRACE_CONTROL, "Can't add mapping '%s', " diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index bacecbd68a6d..dc51dd86377d 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -773,7 +773,8 @@ static int put_v4l2_event32(struct v4l2_event *kp, struct v4l2_event32 __user *u copy_to_user(&up->u, &kp->u, sizeof(kp->u)) || put_user(kp->pending, &up->pending) || put_user(kp->sequence, &up->sequence) || - compat_put_timespec(&kp->timestamp, &up->timestamp) || + put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || + put_user(kp->timestamp.tv_nsec, &up->timestamp.tv_nsec) || put_user(kp->id, &up->id) || copy_to_user(up->reserved, kp->reserved, 8 * sizeof(__u32))) return -EFAULT; diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 2e5233b60971..ae856161faa9 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -244,6 +244,10 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed, ctx->real_mode = false; } + /* + * Increment driver use count. Enables global TLBIs for hash + * and callbacks to handle the segment table + */ cxl_ctx_get(); if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) { diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index afa211397048..d3e009438991 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -91,7 +91,6 @@ static int __afu_open(struct inode *inode, struct file *file, bool master) pr_devel("afu_open pe: %i\n", ctx->pe); file->private_data = ctx; - cxl_ctx_get(); /* indicate success */ rc = 0; @@ -213,6 +212,12 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, ctx->glpid = get_task_pid(current->group_leader, PIDTYPE_PID); + /* + * Increment driver use count. Enables global TLBIs for hash + * and callbacks to handle the segment table + */ + cxl_ctx_get(); + trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr); if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor, @@ -222,6 +227,7 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, put_pid(ctx->glpid); put_pid(ctx->pid); ctx->glpid = ctx->pid = NULL; + cxl_ctx_put(); goto out; } diff --git a/drivers/misc/lkdtm_core.c b/drivers/misc/lkdtm_core.c index f9154b8d67f6..b2989f2d3126 100644 --- a/drivers/misc/lkdtm_core.c +++ b/drivers/misc/lkdtm_core.c @@ -533,7 +533,9 @@ static void __exit lkdtm_module_exit(void) /* Handle test-specific clean-up. */ lkdtm_usercopy_exit(); - unregister_jprobe(lkdtm_jprobe); + if (lkdtm_jprobe != NULL) + unregister_jprobe(lkdtm_jprobe); + pr_info("Crash point unregistered\n"); } diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index b592ca491101..e0218b3808d7 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1269,6 +1269,23 @@ out_err: return err; } +static void mmc_select_driver_type(struct mmc_card *card) +{ + int card_drv_type, drive_strength, drv_type; + + card_drv_type = card->ext_csd.raw_driver_strength | + mmc_driver_type_mask(0); + + drive_strength = mmc_select_drive_strength(card, + card->ext_csd.hs200_max_dtr, + card_drv_type, &drv_type); + + card->drive_strength = drive_strength; + + if (drv_type) + mmc_set_driver_type(card->host, drv_type); +} + static int mmc_select_hs400es(struct mmc_card *card) { struct mmc_host *host = card->host; @@ -1317,6 +1334,8 @@ static int mmc_select_hs400es(struct mmc_card *card) goto out_err; } + mmc_select_driver_type(card); + /* Switch card to HS400 */ val = EXT_CSD_TIMING_HS400 | card->drive_strength << EXT_CSD_DRV_STR_SHIFT; @@ -1350,23 +1369,6 @@ out_err: return err; } -static void mmc_select_driver_type(struct mmc_card *card) -{ - int card_drv_type, drive_strength, drv_type; - - card_drv_type = card->ext_csd.raw_driver_strength | - mmc_driver_type_mask(0); - - drive_strength = mmc_select_drive_strength(card, - card->ext_csd.hs200_max_dtr, - card_drv_type, &drv_type); - - card->drive_strength = drive_strength; - - if (drv_type) - mmc_set_driver_type(card->host, drv_type); -} - /* * For device supporting HS200 mode, the following sequence * should be done before executing the tuning process. diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index 1499d5333c79..528524a22c80 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c @@ -277,7 +277,7 @@ static void sdio_release_func(struct device *dev) sdio_free_func_cis(func); kfree(func->info); - + kfree(func->tmpbuf); kfree(func); } @@ -292,6 +292,16 @@ struct sdio_func *sdio_alloc_func(struct mmc_card *card) if (!func) return ERR_PTR(-ENOMEM); + /* + * allocate buffer separately to make sure it's properly aligned for + * DMA usage (incl. 64 bit DMA) + */ + func->tmpbuf = kmalloc(4, GFP_KERNEL); + if (!func->tmpbuf) { + kfree(func); + return ERR_PTR(-ENOMEM); + } + func->card = card; device_initialize(&func->dev); diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index be928ce62d32..9fdb0f0bfa06 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -333,7 +333,7 @@ static void esd_usb2_rx_can_msg(struct esd_usb2_net_priv *priv, } cf->can_id = id & ESD_IDMASK; - cf->can_dlc = get_can_dlc(msg->msg.rx.dlc); + cf->can_dlc = get_can_dlc(msg->msg.rx.dlc & ~ESD_RTR); if (id & ESD_EXTID) cf->can_id |= CAN_EFF_FLAG; diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 05369dc9dd09..eea9aea14b00 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -375,6 +375,8 @@ static void gs_usb_receive_bulk_callback(struct urb *urb) gs_free_tx_context(txc); + atomic_dec(&dev->active_tx_urbs); + netif_wake_queue(netdev); } @@ -463,14 +465,6 @@ static void gs_usb_xmit_callback(struct urb *urb) urb->transfer_buffer_length, urb->transfer_buffer, urb->transfer_dma); - - atomic_dec(&dev->active_tx_urbs); - - if (!netif_device_present(netdev)) - return; - - if (netif_queue_stopped(netdev)) - netif_wake_queue(netdev); } static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 3ec573c13dac..c26debc531ee 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -326,6 +326,7 @@ static void b53_get_vlan_entry(struct b53_device *dev, u16 vid, static void b53_set_forwarding(struct b53_device *dev, int enable) { + struct dsa_switch *ds = dev->ds; u8 mgmt; b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt); @@ -336,6 +337,15 @@ static void b53_set_forwarding(struct b53_device *dev, int enable) mgmt &= ~SM_SW_FWD_EN; b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, mgmt); + + /* Include IMP port in dumb forwarding mode when no tagging protocol is + * set + */ + if (ds->ops->get_tag_protocol(ds) == DSA_TAG_PROTO_NONE) { + b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, &mgmt); + mgmt |= B53_MII_DUMB_FWDG_EN; + b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, mgmt); + } } static void b53_enable_vlan(struct b53_device *dev, bool enable) diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index dac0af4e2cd0..81044000ce75 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -104,6 +104,10 @@ #define B53_UC_FWD_EN BIT(6) #define B53_MC_FWD_EN BIT(7) +/* Switch control (8 bit) */ +#define B53_SWITCH_CTRL 0x22 +#define B53_MII_DUMB_FWDG_EN BIT(6) + /* (16 bit) */ #define B53_UC_FLOOD_MASK 0x32 #define B53_MC_FLOOD_MASK 0x34 diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index aaf6fec566b5..3660a3d51731 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -402,7 +402,7 @@ static int mal_poll(struct napi_struct *napi, int budget) unsigned long flags; MAL_DBG2(mal, "poll(%d)" NL, budget); - again: + /* Process TX skbs */ list_for_each(l, &mal->poll_list) { struct mal_commac *mc = @@ -451,7 +451,6 @@ static int mal_poll(struct napi_struct *napi, int budget) spin_lock_irqsave(&mal->lock, flags); mal_disable_eob_irq(mal); spin_unlock_irqrestore(&mal->lock, flags); - goto again; } mc->ops->poll_tx(mc->dev); } diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 9f2184be55dc..b8778e7b1f79 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1253,6 +1253,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter) release_sub_crq_queue(adapter, adapter->tx_scrq[i]); } + kfree(adapter->tx_scrq); adapter->tx_scrq = NULL; } @@ -1265,6 +1266,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter) release_sub_crq_queue(adapter, adapter->rx_scrq[i]); } + kfree(adapter->rx_scrq); adapter->rx_scrq = NULL; } diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 9affd7c198bd..6a62447fe377 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -7882,6 +7882,11 @@ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) pci_enable_wake(pdev, PCI_D3hot, 0); pci_enable_wake(pdev, PCI_D3cold, 0); + /* In case of PCI error, adapter lose its HW address + * so we should re-assign it here. + */ + hw->hw_addr = adapter->io_addr; + igb_reset(adapter); wr32(E1000_WUS, ~0); result = PCI_ERS_RESULT_RECOVERED; diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 0a4e81a253fb..ed6fae964ec5 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -4413,13 +4413,12 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port, struct mvpp2_txq_pcpu_buf *tx_buf = txq_pcpu->buffs + txq_pcpu->txq_get_index; - mvpp2_txq_inc_get(txq_pcpu); - dma_unmap_single(port->dev->dev.parent, tx_buf->phys, tx_buf->size, DMA_TO_DEVICE); - if (!tx_buf->skb) - continue; - dev_kfree_skb_any(tx_buf->skb); + if (tx_buf->skb) + dev_kfree_skb_any(tx_buf->skb); + + mvpp2_txq_inc_get(txq_pcpu); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index a5fc46bbcbe2..d4d97ca12e83 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -88,10 +88,17 @@ void mlx4_en_remove_timestamp(struct mlx4_en_dev *mdev) } } +#define MLX4_EN_WRAP_AROUND_SEC 10UL +/* By scheduling the overflow check every 5 seconds, we have a reasonably + * good chance we wont miss a wrap around. + * TOTO: Use a timer instead of a work queue to increase the guarantee. + */ +#define MLX4_EN_OVERFLOW_PERIOD (MLX4_EN_WRAP_AROUND_SEC * HZ / 2) + void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev) { bool timeout = time_is_before_jiffies(mdev->last_overflow_check + - mdev->overflow_period); + MLX4_EN_OVERFLOW_PERIOD); unsigned long flags; if (timeout) { @@ -236,7 +243,6 @@ static const struct ptp_clock_info mlx4_en_ptp_clock_info = { .enable = mlx4_en_phc_enable, }; -#define MLX4_EN_WRAP_AROUND_SEC 10ULL /* This function calculates the max shift that enables the user range * of MLX4_EN_WRAP_AROUND_SEC values in the cycles register. @@ -261,7 +267,6 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) { struct mlx4_dev *dev = mdev->dev; unsigned long flags; - u64 ns, zero = 0; /* mlx4_en_init_timestamp is called for each netdev. * mdev->ptp_clock is common for all ports, skip initialization if @@ -285,13 +290,6 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) ktime_to_ns(ktime_get_real())); write_unlock_irqrestore(&mdev->clock_lock, flags); - /* Calculate period in seconds to call the overflow watchdog - to make - * sure counter is checked at least once every wrap around. - */ - ns = cyclecounter_cyc2ns(&mdev->cycles, mdev->cycles.mask, zero, &zero); - do_div(ns, NSEC_PER_SEC / 2 / HZ); - mdev->overflow_period = ns; - /* Configure the PHC */ mdev->ptp_clock_info = mlx4_en_ptp_clock_info; snprintf(mdev->ptp_clock_info.name, 16, "mlx4 ptp"); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index ba652d8a2b93..727122de7df0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -841,8 +841,6 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return -ENOSYS; } - mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz; - dev->caps.hca_core_clock = hca_param.hca_core_clock; memset(&dev_cap, 0, sizeof(dev_cap)); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index a3528dd1e72e..df0f39611c5e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -419,7 +419,6 @@ struct mlx4_en_dev { struct cyclecounter cycles; struct timecounter clock; unsigned long last_overflow_check; - unsigned long overflow_period; struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_clock_info; struct notifier_block nb; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 1806b1fc6e4c..d50350c7adc4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -249,15 +249,14 @@ static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp, } static struct mlxsw_sp_span_entry * -mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port) +mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port) { - struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; int i; for (i = 0; i < mlxsw_sp->span.entries_count; i++) { struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; - if (curr->used && curr->local_port == port->local_port) + if (curr->used && curr->local_port == local_port) return curr; } return NULL; @@ -268,7 +267,8 @@ static struct mlxsw_sp_span_entry { struct mlxsw_sp_span_entry *span_entry; - span_entry = mlxsw_sp_span_entry_find(port); + span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp, + port->local_port); if (span_entry) { /* Already exists, just take a reference */ span_entry->ref_count++; @@ -453,12 +453,13 @@ err_port_bind: } static void mlxsw_sp_span_mirror_remove(struct mlxsw_sp_port *from, - struct mlxsw_sp_port *to, + u8 destination_port, enum mlxsw_sp_span_type type) { struct mlxsw_sp_span_entry *span_entry; - span_entry = mlxsw_sp_span_entry_find(to); + span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp, + destination_port); if (!span_entry) { netdev_err(from->dev, "no span entry found\n"); return; @@ -1255,10 +1256,8 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_cls_matchall_offload *cls) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry; enum mlxsw_sp_span_type span_type; - struct mlxsw_sp_port *to_port; mall_tc_entry = mlxsw_sp_port_mirror_entry_find(mlxsw_sp_port, cls->cookie); @@ -1269,11 +1268,12 @@ static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, switch (mall_tc_entry->type) { case MLXSW_SP_PORT_MALL_MIRROR: - to_port = mlxsw_sp->ports[mall_tc_entry->mirror.to_local_port]; span_type = mall_tc_entry->mirror.ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; - mlxsw_sp_span_mirror_remove(mlxsw_sp_port, to_port, span_type); + mlxsw_sp_span_mirror_remove(mlxsw_sp_port, + mall_tc_entry->mirror.to_local_port, + span_type); break; default: WARN_ON(1); diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 653bb5735f0c..433f8be57847 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -642,7 +642,9 @@ static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev, #define OOO_LB_TC 9 int qed_configure_vport_wfq(struct qed_dev *cdev, u16 vp_id, u32 rate); -void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev, u32 min_pf_rate); +void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev, + struct qed_ptt *p_ptt, + u32 min_pf_rate); void qed_clean_wfq_db(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); #define QED_LEADING_HWFN(dev) (&dev->hwfns[0]) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index a4789a93b692..9d59cb85c012 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -1222,7 +1222,7 @@ static struct qed_dcbx_get *qed_dcbnl_get_dcbx(struct qed_hwfn *hwfn, { struct qed_dcbx_get *dcbx_info; - dcbx_info = kzalloc(sizeof(*dcbx_info), GFP_KERNEL); + dcbx_info = kmalloc(sizeof(*dcbx_info), GFP_ATOMIC); if (!dcbx_info) return NULL; diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index edae5fc5fccd..afe5e57d9acb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -877,7 +877,7 @@ qed_hw_init_pf_doorbell_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) /* Either EDPM is mandatory, or we are attempting to allocate a * WID per CPU. */ - n_cpus = num_active_cpus(); + n_cpus = num_present_cpus(); rc = qed_hw_init_dpi_size(p_hwfn, p_ptt, pwm_regsize, n_cpus); } @@ -2732,7 +2732,8 @@ int qed_configure_vport_wfq(struct qed_dev *cdev, u16 vp_id, u32 rate) } /* API to configure WFQ from mcp link change */ -void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev, u32 min_pf_rate) +void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev, + struct qed_ptt *p_ptt, u32 min_pf_rate) { int i; @@ -2746,8 +2747,7 @@ void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev, u32 min_pf_rate) for_each_hwfn(cdev, i) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; - __qed_configure_vp_wfq_on_link_change(p_hwfn, - p_hwfn->p_dpc_ptt, + __qed_configure_vp_wfq_on_link_change(p_hwfn, p_ptt, min_pf_rate); } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index bdc9ba92f6d4..8b7d2f963ee1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -628,7 +628,8 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn, /* Min bandwidth configuration */ __qed_configure_pf_min_bandwidth(p_hwfn, p_ptt, p_link, min_bw); - qed_configure_vp_wfq_on_link_change(p_hwfn->cdev, p_link->min_pf_rate); + qed_configure_vp_wfq_on_link_change(p_hwfn->cdev, p_ptt, + p_link->min_pf_rate); p_link->an = !!(status & LINK_STATUS_AUTO_NEGOTIATE_ENABLED); p_link->an_complete = !!(status & diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index f3a825a8f8d5..d9dcb0d1714c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -1766,13 +1766,13 @@ static int qed_roce_query_qp(struct qed_hwfn *p_hwfn, if (rc) goto err_resp; - dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_resp_ramrod_res), - p_resp_ramrod_res, resp_ramrod_res_phys); - out_params->rq_psn = le32_to_cpu(p_resp_ramrod_res->psn); rq_err_state = GET_FIELD(le32_to_cpu(p_resp_ramrod_res->err_flag), ROCE_QUERY_QP_RESP_OUTPUT_PARAMS_ERROR_FLG); + dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_resp_ramrod_res), + p_resp_ramrod_res, resp_ramrod_res_phys); + if (!(qp->req_offloaded)) { /* Don't send query qp for the requester */ out_params->sq_psn = qp->sq_psn; @@ -1813,9 +1813,6 @@ static int qed_roce_query_qp(struct qed_hwfn *p_hwfn, if (rc) goto err_req; - dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_req_ramrod_res), - p_req_ramrod_res, req_ramrod_res_phys); - out_params->sq_psn = le32_to_cpu(p_req_ramrod_res->psn); sq_err_state = GET_FIELD(le32_to_cpu(p_req_ramrod_res->flags), ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_ERR_FLG); @@ -1823,6 +1820,9 @@ static int qed_roce_query_qp(struct qed_hwfn *p_hwfn, GET_FIELD(le32_to_cpu(p_req_ramrod_res->flags), ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_SQ_DRAINING_FLG); + dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_req_ramrod_res), + p_req_ramrod_res, req_ramrod_res_phys); + out_params->draining = false; if (rq_err_state) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 7567cc464b88..634e4149af22 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -1221,7 +1221,7 @@ static int qede_selftest_receive_traffic(struct qede_dev *edev) struct qede_rx_queue *rxq = NULL; struct sw_rx_data *sw_rx_data; union eth_rx_cqe *cqe; - int i, rc = 0; + int i, iter, rc = 0; u8 *data_ptr; for_each_queue(i) { @@ -1240,7 +1240,7 @@ static int qede_selftest_receive_traffic(struct qede_dev *edev) * enabled. This is because the queue 0 is configured as the default * queue and that the loopback traffic is not IP. */ - for (i = 0; i < QEDE_SELFTEST_POLL_COUNT; i++) { + for (iter = 0; iter < QEDE_SELFTEST_POLL_COUNT; iter++) { if (!qede_has_rx_work(rxq)) { usleep_range(100, 200); continue; @@ -1287,7 +1287,7 @@ static int qede_selftest_receive_traffic(struct qede_dev *edev) qed_chain_recycle_consumed(&rxq->rx_comp_ring); } - if (i == QEDE_SELFTEST_POLL_COUNT) { + if (iter == QEDE_SELFTEST_POLL_COUNT) { DP_NOTICE(edev, "Failed to receive the traffic\n"); return -1; } diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 0b4deb31e742..f683bfbd9986 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -932,7 +932,8 @@ static void emac_mac_rx_descs_refill(struct emac_adapter *adpt, curr_rxbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent, skb->data, - curr_rxbuf->length, DMA_FROM_DEVICE); + adpt->rxbuf_size, DMA_FROM_DEVICE); + ret = dma_mapping_error(adpt->netdev->dev.parent, curr_rxbuf->dma_addr); if (ret) { diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h index d050f37f3e0f..5024280f5af2 100644 --- a/drivers/net/ethernet/renesas/sh_eth.h +++ b/drivers/net/ethernet/renesas/sh_eth.h @@ -339,7 +339,7 @@ enum FELIC_MODE_BIT { ECMR_DPAD = 0x00200000, ECMR_RZPF = 0x00100000, ECMR_ZPF = 0x00080000, ECMR_PFR = 0x00040000, ECMR_RXF = 0x00020000, ECMR_TXF = 0x00010000, ECMR_MCT = 0x00002000, ECMR_PRCEF = 0x00001000, - ECMR_PMDE = 0x00000200, ECMR_RE = 0x00000040, ECMR_TE = 0x00000020, + ECMR_MPDE = 0x00000200, ECMR_RE = 0x00000040, ECMR_TE = 0x00000020, ECMR_RTM = 0x00000010, ECMR_ILB = 0x00000008, ECMR_ELB = 0x00000004, ECMR_DM = 0x00000002, ECMR_PRM = 0x00000001, }; diff --git a/drivers/net/ethernet/rocker/rocker_tlv.h b/drivers/net/ethernet/rocker/rocker_tlv.h index a63ef82e7c72..dfae3c9d57c6 100644 --- a/drivers/net/ethernet/rocker/rocker_tlv.h +++ b/drivers/net/ethernet/rocker/rocker_tlv.h @@ -139,40 +139,52 @@ rocker_tlv_start(struct rocker_desc_info *desc_info) int rocker_tlv_put(struct rocker_desc_info *desc_info, int attrtype, int attrlen, const void *data); -static inline int rocker_tlv_put_u8(struct rocker_desc_info *desc_info, - int attrtype, u8 value) +static inline int +rocker_tlv_put_u8(struct rocker_desc_info *desc_info, int attrtype, u8 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(u8), &value); + u8 tmp = value; /* work around GCC PR81715 */ + + return rocker_tlv_put(desc_info, attrtype, sizeof(u8), &tmp); } -static inline int rocker_tlv_put_u16(struct rocker_desc_info *desc_info, - int attrtype, u16 value) +static inline int +rocker_tlv_put_u16(struct rocker_desc_info *desc_info, int attrtype, u16 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(u16), &value); + u16 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(u16), &tmp); } -static inline int rocker_tlv_put_be16(struct rocker_desc_info *desc_info, - int attrtype, __be16 value) +static inline int +rocker_tlv_put_be16(struct rocker_desc_info *desc_info, int attrtype, __be16 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(__be16), &value); + __be16 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(__be16), &tmp); } -static inline int rocker_tlv_put_u32(struct rocker_desc_info *desc_info, - int attrtype, u32 value) +static inline int +rocker_tlv_put_u32(struct rocker_desc_info *desc_info, int attrtype, u32 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(u32), &value); + u32 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(u32), &tmp); } -static inline int rocker_tlv_put_be32(struct rocker_desc_info *desc_info, - int attrtype, __be32 value) +static inline int +rocker_tlv_put_be32(struct rocker_desc_info *desc_info, int attrtype, __be32 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(__be32), &value); + __be32 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(__be32), &tmp); } -static inline int rocker_tlv_put_u64(struct rocker_desc_info *desc_info, - int attrtype, u64 value) +static inline int +rocker_tlv_put_u64(struct rocker_desc_info *desc_info, int attrtype, u64 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(u64), &value); + u64 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(u64), &tmp); } static inline struct rocker_tlv * diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index c4ada7227f40..1d85109cb8ed 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -197,11 +197,15 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx) nic_data->datapath_caps = MCDI_DWORD(outbuf, GET_CAPABILITIES_OUT_FLAGS1); - if (outlen >= MC_CMD_GET_CAPABILITIES_V2_OUT_LEN) + if (outlen >= MC_CMD_GET_CAPABILITIES_V2_OUT_LEN) { nic_data->datapath_caps2 = MCDI_DWORD(outbuf, GET_CAPABILITIES_V2_OUT_FLAGS2); - else + nic_data->piobuf_size = MCDI_WORD(outbuf, + GET_CAPABILITIES_V2_OUT_SIZE_PIO_BUFF); + } else { nic_data->datapath_caps2 = 0; + nic_data->piobuf_size = ER_DZ_TX_PIOBUF_SIZE; + } /* record the DPCPU firmware IDs to determine VEB vswitching support. */ @@ -825,8 +829,8 @@ static int efx_ef10_link_piobufs(struct efx_nic *efx) offset = ((efx->tx_channel_offset + efx->n_tx_channels - tx_queue->channel->channel - 1) * efx_piobuf_size); - index = offset / ER_DZ_TX_PIOBUF_SIZE; - offset = offset % ER_DZ_TX_PIOBUF_SIZE; + index = offset / nic_data->piobuf_size; + offset = offset % nic_data->piobuf_size; /* When the host page size is 4K, the first * host page in the WC mapping may be within @@ -1161,11 +1165,11 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) * functions of the controller. */ if (efx_piobuf_size != 0 && - ER_DZ_TX_PIOBUF_SIZE / efx_piobuf_size * EF10_TX_PIOBUF_COUNT >= + nic_data->piobuf_size / efx_piobuf_size * EF10_TX_PIOBUF_COUNT >= efx->n_tx_channels) { unsigned int n_piobufs = DIV_ROUND_UP(efx->n_tx_channels, - ER_DZ_TX_PIOBUF_SIZE / efx_piobuf_size); + nic_data->piobuf_size / efx_piobuf_size); rc = efx_ef10_alloc_piobufs(efx, n_piobufs); if (rc) diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 73bee7ea332a..73028f21a2d7 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -500,6 +500,7 @@ enum { * @pio_write_base: Base address for writing PIO buffers * @pio_write_vi_base: Relative VI number for @pio_write_base * @piobuf_handle: Handle of each PIO buffer allocated + * @piobuf_size: size of a single PIO buffer * @must_restore_piobufs: Flag: PIO buffers have yet to be restored after MC * reboot * @rx_rss_context: Firmware handle for our RSS context @@ -537,6 +538,7 @@ struct efx_ef10_nic_data { void __iomem *wc_membase, *pio_write_base; unsigned int pio_write_vi_base; unsigned int piobuf_handle[EF10_TX_PIOBUF_COUNT]; + u16 piobuf_size; bool must_restore_piobufs; u32 rx_rss_context; bool rx_rss_context_exclusive; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 233778911557..6f26acd0aa61 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -27,7 +27,6 @@ #ifdef EFX_USE_PIO -#define EFX_PIOBUF_SIZE_MAX ER_DZ_TX_PIOBUF_SIZE #define EFX_PIOBUF_SIZE_DEF ALIGN(256, L1_CACHE_BYTES) unsigned int efx_piobuf_size __read_mostly = EFX_PIOBUF_SIZE_DEF; diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c index d15dd3938ba8..2e5150b0b8d5 100644 --- a/drivers/net/phy/xilinx_gmii2rgmii.c +++ b/drivers/net/phy/xilinx_gmii2rgmii.c @@ -44,7 +44,7 @@ static int xgmiitorgmii_read_status(struct phy_device *phydev) priv->phy_drv->read_status(phydev); val = mdiobus_read(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG); - val &= XILINX_GMII2RGMII_SPEED_MASK; + val &= ~XILINX_GMII2RGMII_SPEED_MASK; if (phydev->speed == SPEED_1000) val |= BMCR_SPEED1000; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index a380649bf6b5..26681707fc7a 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2366,8 +2366,10 @@ start_again: hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI, TEAM_CMD_OPTIONS_GET); - if (!hdr) + if (!hdr) { + nlmsg_free(skb); return -EMSGSIZE; + } if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) goto nla_put_failure; @@ -2639,8 +2641,10 @@ start_again: hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI, TEAM_CMD_PORT_LIST_GET); - if (!hdr) + if (!hdr) { + nlmsg_free(skb); return -EMSGSIZE; + } if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) goto nla_put_failure; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 4b7a363448a2..1c4a3d1f01db 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1279,11 +1279,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, switch (tun->flags & TUN_TYPE_MASK) { case IFF_TUN: if (tun->flags & IFF_NO_PI) { - switch (skb->data[0] & 0xf0) { - case 0x40: + u8 ip_version = skb->len ? (skb->data[0] >> 4) : 0; + + switch (ip_version) { + case 4: pi.proto = htons(ETH_P_IP); break; - case 0x60: + case 6: pi.proto = htons(ETH_P_IPV6); break; default: diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index cdde59089f72..3a7286256db0 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -364,7 +364,7 @@ config USB_NET_NET1080 optionally with LEDs that indicate traffic config USB_NET_PLUSB - tristate "Prolific PL-2301/2302/25A1 based cables" + tristate "Prolific PL-2301/2302/25A1/27A1 based cables" # if the handshake/init/reset problems, from original 'plusb', # are ever resolved ... then remove "experimental" depends on USB_USBNET diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c index 22e1a9a99a7d..6fe59373cba9 100644 --- a/drivers/net/usb/plusb.c +++ b/drivers/net/usb/plusb.c @@ -102,7 +102,7 @@ static int pl_reset(struct usbnet *dev) } static const struct driver_info prolific_info = { - .description = "Prolific PL-2301/PL-2302/PL-25A1", + .description = "Prolific PL-2301/PL-2302/PL-25A1/PL-27A1", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT, /* some PL-2302 versions seem to fail usb_set_interface() */ .reset = pl_reset, @@ -139,6 +139,17 @@ static const struct usb_device_id products [] = { * Host-to-Host Cable */ .driver_info = (unsigned long) &prolific_info, + +}, + +/* super speed cables */ +{ + USB_DEVICE(0x067b, 0x27a1), /* PL-27A1, no eeprom + * also: goobay Active USB 3.0 + * Data Link, + * Unitek Y-3501 + */ + .driver_info = (unsigned long) &prolific_info, }, { }, // END @@ -158,5 +169,5 @@ static struct usb_driver plusb_driver = { module_usb_driver(plusb_driver); MODULE_AUTHOR("David Brownell"); -MODULE_DESCRIPTION("Prolific PL-2301/2302/25A1 USB Host to Host Link Driver"); +MODULE_DESCRIPTION("Prolific PL-2301/2302/25A1/27A1 USB Host to Host Link Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index be5b527472f9..90c0c4a7175d 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -314,6 +314,7 @@ struct ath10k_peer { struct ieee80211_vif *vif; struct ieee80211_sta *sta; + bool removed; int vdev_id; u8 addr[ETH_ALEN]; DECLARE_BITMAP(peer_ids, ATH10K_MAX_NUM_PEER_IDS); diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index f2e85eb22afe..30e98afa2e68 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3738,6 +3738,9 @@ struct ieee80211_txq *ath10k_mac_txq_lookup(struct ath10k *ar, if (!peer) return NULL; + if (peer->removed) + return NULL; + if (peer->sta) return peer->sta->txq[tid]; else if (peer->vif) @@ -7422,6 +7425,20 @@ ath10k_mac_op_switch_vif_chanctx(struct ieee80211_hw *hw, return 0; } +static void ath10k_mac_op_sta_pre_rcu_remove(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta) +{ + struct ath10k *ar; + struct ath10k_peer *peer; + + ar = hw->priv; + + list_for_each_entry(peer, &ar->peers, list) + if (peer->sta == sta) + peer->removed = true; +} + static const struct ieee80211_ops ath10k_ops = { .tx = ath10k_mac_op_tx, .wake_tx_queue = ath10k_mac_op_wake_tx_queue, @@ -7462,6 +7479,7 @@ static const struct ieee80211_ops ath10k_ops = { .assign_vif_chanctx = ath10k_mac_op_assign_vif_chanctx, .unassign_vif_chanctx = ath10k_mac_op_unassign_vif_chanctx, .switch_vif_chanctx = ath10k_mac_op_switch_vif_chanctx, + .sta_pre_rcu_remove = ath10k_mac_op_sta_pre_rcu_remove, CFG80211_TESTMODE_CMD(ath10k_tm_cmd) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 1d4352e1ac81..27960b0bfbcd 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -978,7 +978,7 @@ static void brcmf_escan_prep(struct brcmf_cfg80211_info *cfg, eth_broadcast_addr(params_le->bssid); params_le->bss_type = DOT11_BSSTYPE_ANY; - params_le->scan_type = 0; + params_le->scan_type = BRCMF_SCANTYPE_ACTIVE; params_le->channel_num = 0; params_le->nprobes = cpu_to_le32(-1); params_le->active_time = cpu_to_le32(-1); @@ -986,12 +986,9 @@ static void brcmf_escan_prep(struct brcmf_cfg80211_info *cfg, params_le->home_time = cpu_to_le32(-1); memset(¶ms_le->ssid_le, 0, sizeof(params_le->ssid_le)); - /* if request is null exit so it will be all channel broadcast scan */ - if (!request) - return; - n_ssids = request->n_ssids; n_channels = request->n_channels; + /* Copy channel array if applicable */ brcmf_dbg(SCAN, "### List of channelspecs to scan ### %d\n", n_channels); @@ -1028,16 +1025,8 @@ static void brcmf_escan_prep(struct brcmf_cfg80211_info *cfg, ptr += sizeof(ssid_le); } } else { - brcmf_dbg(SCAN, "Broadcast scan %p\n", request->ssids); - if ((request->ssids) && request->ssids->ssid_len) { - brcmf_dbg(SCAN, "SSID %s len=%d\n", - params_le->ssid_le.SSID, - request->ssids->ssid_len); - params_le->ssid_le.SSID_len = - cpu_to_le32(request->ssids->ssid_len); - memcpy(¶ms_le->ssid_le.SSID, request->ssids->ssid, - request->ssids->ssid_len); - } + brcmf_dbg(SCAN, "Performing passive scan\n"); + params_le->scan_type = BRCMF_SCANTYPE_PASSIVE; } /* Adding mask to channel numbers */ params_le->channel_num = @@ -3097,6 +3086,7 @@ brcmf_cfg80211_escan_handler(struct brcmf_if *ifp, struct brcmf_cfg80211_info *cfg = ifp->drvr->config; s32 status; struct brcmf_escan_result_le *escan_result_le; + u32 escan_buflen; struct brcmf_bss_info_le *bss_info_le; struct brcmf_bss_info_le *bss = NULL; u32 bi_length; @@ -3113,11 +3103,23 @@ brcmf_cfg80211_escan_handler(struct brcmf_if *ifp, if (status == BRCMF_E_STATUS_PARTIAL) { brcmf_dbg(SCAN, "ESCAN Partial result\n"); + if (e->datalen < sizeof(*escan_result_le)) { + brcmf_err("invalid event data length\n"); + goto exit; + } escan_result_le = (struct brcmf_escan_result_le *) data; if (!escan_result_le) { brcmf_err("Invalid escan result (NULL pointer)\n"); goto exit; } + escan_buflen = le32_to_cpu(escan_result_le->buflen); + if (escan_buflen > BRCMF_ESCAN_BUF_SIZE || + escan_buflen > e->datalen || + escan_buflen < sizeof(*escan_result_le)) { + brcmf_err("Invalid escan buffer length: %d\n", + escan_buflen); + goto exit; + } if (le16_to_cpu(escan_result_le->bss_count) != 1) { brcmf_err("Invalid bss_count %d: ignoring\n", escan_result_le->bss_count); @@ -3134,9 +3136,8 @@ brcmf_cfg80211_escan_handler(struct brcmf_if *ifp, } bi_length = le32_to_cpu(bss_info_le->length); - if (bi_length != (le32_to_cpu(escan_result_le->buflen) - - WL_ESCAN_RESULTS_FIXED_SIZE)) { - brcmf_err("Invalid bss_info length %d: ignoring\n", + if (bi_length != escan_buflen - WL_ESCAN_RESULTS_FIXED_SIZE) { + brcmf_err("Ignoring invalid bss_info length: %d\n", bi_length); goto exit; } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c index 79c081fd560f..6afcf86b9ba2 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c @@ -429,7 +429,8 @@ void brcmf_fweh_process_event(struct brcmf_pub *drvr, if (code != BRCMF_E_IF && !fweh->evt_handler[code]) return; - if (datalen > BRCMF_DCMD_MAXLEN) + if (datalen > BRCMF_DCMD_MAXLEN || + datalen + sizeof(*event_packet) > packet_len) return; if (in_interrupt()) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h index a4118c0ef6ca..59013572fbe3 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h @@ -45,6 +45,11 @@ #define BRCMF_SCAN_PARAMS_COUNT_MASK 0x0000ffff #define BRCMF_SCAN_PARAMS_NSSID_SHIFT 16 +/* scan type definitions */ +#define BRCMF_SCANTYPE_DEFAULT 0xFF +#define BRCMF_SCANTYPE_ACTIVE 0 +#define BRCMF_SCANTYPE_PASSIVE 1 + /* primary (ie tx) key */ #define BRCMF_PRIMARY_KEY (1 << 1) #define DOT11_BSSTYPE_ANY 2 diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c index b3aab2fe96eb..ef685465f80a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c @@ -14764,8 +14764,8 @@ static void wlc_phy_ipa_restore_tx_digi_filts_nphy(struct brcms_phy *pi) } static void -wlc_phy_set_rfseq_nphy(struct brcms_phy *pi, u8 cmd, u8 *events, u8 *dlys, - u8 len) +wlc_phy_set_rfseq_nphy(struct brcms_phy *pi, u8 cmd, const u8 *events, + const u8 *dlys, u8 len) { u32 t1_offset, t2_offset; u8 ctr; @@ -15240,16 +15240,16 @@ static void wlc_phy_workarounds_nphy_gainctrl_2057_rev5(struct brcms_phy *pi) static void wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi) { u16 currband; - s8 lna1G_gain_db_rev7[] = { 9, 14, 19, 24 }; - s8 *lna1_gain_db = NULL; - s8 *lna1_gain_db_2 = NULL; - s8 *lna2_gain_db = NULL; - s8 tiaA_gain_db_rev7[] = { -9, -6, -3, 0, 3, 3, 3, 3, 3, 3 }; - s8 *tia_gain_db; - s8 tiaA_gainbits_rev7[] = { 0, 1, 2, 3, 4, 4, 4, 4, 4, 4 }; - s8 *tia_gainbits; - u16 rfseqA_init_gain_rev7[] = { 0x624f, 0x624f }; - u16 *rfseq_init_gain; + static const s8 lna1G_gain_db_rev7[] = { 9, 14, 19, 24 }; + const s8 *lna1_gain_db = NULL; + const s8 *lna1_gain_db_2 = NULL; + const s8 *lna2_gain_db = NULL; + static const s8 tiaA_gain_db_rev7[] = { -9, -6, -3, 0, 3, 3, 3, 3, 3, 3 }; + const s8 *tia_gain_db; + static const s8 tiaA_gainbits_rev7[] = { 0, 1, 2, 3, 4, 4, 4, 4, 4, 4 }; + const s8 *tia_gainbits; + static const u16 rfseqA_init_gain_rev7[] = { 0x624f, 0x624f }; + const u16 *rfseq_init_gain; u16 init_gaincode; u16 clip1hi_gaincode; u16 clip1md_gaincode = 0; @@ -15310,10 +15310,9 @@ static void wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi) if ((freq <= 5080) || (freq == 5825)) { - s8 lna1A_gain_db_rev7[] = { 11, 16, 20, 24 }; - s8 lna1A_gain_db_2_rev7[] = { - 11, 17, 22, 25}; - s8 lna2A_gain_db_rev7[] = { -1, 6, 10, 14 }; + static const s8 lna1A_gain_db_rev7[] = { 11, 16, 20, 24 }; + static const s8 lna1A_gain_db_2_rev7[] = { 11, 17, 22, 25}; + static const s8 lna2A_gain_db_rev7[] = { -1, 6, 10, 14 }; crsminu_th = 0x3e; lna1_gain_db = lna1A_gain_db_rev7; @@ -15321,10 +15320,9 @@ static void wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi) lna2_gain_db = lna2A_gain_db_rev7; } else if ((freq >= 5500) && (freq <= 5700)) { - s8 lna1A_gain_db_rev7[] = { 11, 17, 21, 25 }; - s8 lna1A_gain_db_2_rev7[] = { - 12, 18, 22, 26}; - s8 lna2A_gain_db_rev7[] = { 1, 8, 12, 16 }; + static const s8 lna1A_gain_db_rev7[] = { 11, 17, 21, 25 }; + static const s8 lna1A_gain_db_2_rev7[] = { 12, 18, 22, 26}; + static const s8 lna2A_gain_db_rev7[] = { 1, 8, 12, 16 }; crsminu_th = 0x45; clip1md_gaincode_B = 0x14; @@ -15335,10 +15333,9 @@ static void wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi) lna2_gain_db = lna2A_gain_db_rev7; } else { - s8 lna1A_gain_db_rev7[] = { 12, 18, 22, 26 }; - s8 lna1A_gain_db_2_rev7[] = { - 12, 18, 22, 26}; - s8 lna2A_gain_db_rev7[] = { -1, 6, 10, 14 }; + static const s8 lna1A_gain_db_rev7[] = { 12, 18, 22, 26 }; + static const s8 lna1A_gain_db_2_rev7[] = { 12, 18, 22, 26}; + static const s8 lna2A_gain_db_rev7[] = { -1, 6, 10, 14 }; crsminu_th = 0x41; lna1_gain_db = lna1A_gain_db_rev7; @@ -15450,65 +15447,65 @@ static void wlc_phy_workarounds_nphy_gainctrl(struct brcms_phy *pi) NPHY_RFSEQ_CMD_CLR_HIQ_DIS, NPHY_RFSEQ_CMD_SET_HPF_BW }; - u8 rfseq_updategainu_dlys[] = { 10, 30, 1 }; - s8 lna1G_gain_db[] = { 7, 11, 16, 23 }; - s8 lna1G_gain_db_rev4[] = { 8, 12, 17, 25 }; - s8 lna1G_gain_db_rev5[] = { 9, 13, 18, 26 }; - s8 lna1G_gain_db_rev6[] = { 8, 13, 18, 25 }; - s8 lna1G_gain_db_rev6_224B0[] = { 10, 14, 19, 27 }; - s8 lna1A_gain_db[] = { 7, 11, 17, 23 }; - s8 lna1A_gain_db_rev4[] = { 8, 12, 18, 23 }; - s8 lna1A_gain_db_rev5[] = { 6, 10, 16, 21 }; - s8 lna1A_gain_db_rev6[] = { 6, 10, 16, 21 }; - s8 *lna1_gain_db = NULL; - s8 lna2G_gain_db[] = { -5, 6, 10, 14 }; - s8 lna2G_gain_db_rev5[] = { -3, 7, 11, 16 }; - s8 lna2G_gain_db_rev6[] = { -5, 6, 10, 14 }; - s8 lna2G_gain_db_rev6_224B0[] = { -5, 6, 10, 15 }; - s8 lna2A_gain_db[] = { -6, 2, 6, 10 }; - s8 lna2A_gain_db_rev4[] = { -5, 2, 6, 10 }; - s8 lna2A_gain_db_rev5[] = { -7, 0, 4, 8 }; - s8 lna2A_gain_db_rev6[] = { -7, 0, 4, 8 }; - s8 *lna2_gain_db = NULL; - s8 tiaG_gain_db[] = { + static const u8 rfseq_updategainu_dlys[] = { 10, 30, 1 }; + static const s8 lna1G_gain_db[] = { 7, 11, 16, 23 }; + static const s8 lna1G_gain_db_rev4[] = { 8, 12, 17, 25 }; + static const s8 lna1G_gain_db_rev5[] = { 9, 13, 18, 26 }; + static const s8 lna1G_gain_db_rev6[] = { 8, 13, 18, 25 }; + static const s8 lna1G_gain_db_rev6_224B0[] = { 10, 14, 19, 27 }; + static const s8 lna1A_gain_db[] = { 7, 11, 17, 23 }; + static const s8 lna1A_gain_db_rev4[] = { 8, 12, 18, 23 }; + static const s8 lna1A_gain_db_rev5[] = { 6, 10, 16, 21 }; + static const s8 lna1A_gain_db_rev6[] = { 6, 10, 16, 21 }; + const s8 *lna1_gain_db = NULL; + static const s8 lna2G_gain_db[] = { -5, 6, 10, 14 }; + static const s8 lna2G_gain_db_rev5[] = { -3, 7, 11, 16 }; + static const s8 lna2G_gain_db_rev6[] = { -5, 6, 10, 14 }; + static const s8 lna2G_gain_db_rev6_224B0[] = { -5, 6, 10, 15 }; + static const s8 lna2A_gain_db[] = { -6, 2, 6, 10 }; + static const s8 lna2A_gain_db_rev4[] = { -5, 2, 6, 10 }; + static const s8 lna2A_gain_db_rev5[] = { -7, 0, 4, 8 }; + static const s8 lna2A_gain_db_rev6[] = { -7, 0, 4, 8 }; + const s8 *lna2_gain_db = NULL; + static const s8 tiaG_gain_db[] = { 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A }; - s8 tiaA_gain_db[] = { + static const s8 tiaA_gain_db[] = { 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13 }; - s8 tiaA_gain_db_rev4[] = { + static const s8 tiaA_gain_db_rev4[] = { 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d }; - s8 tiaA_gain_db_rev5[] = { + static const s8 tiaA_gain_db_rev5[] = { 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d }; - s8 tiaA_gain_db_rev6[] = { + static const s8 tiaA_gain_db_rev6[] = { 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d }; - s8 *tia_gain_db; - s8 tiaG_gainbits[] = { + const s8 *tia_gain_db; + static const s8 tiaG_gainbits[] = { 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 }; - s8 tiaA_gainbits[] = { + static const s8 tiaA_gainbits[] = { 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06 }; - s8 tiaA_gainbits_rev4[] = { + static const s8 tiaA_gainbits_rev4[] = { 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 }; - s8 tiaA_gainbits_rev5[] = { + static const s8 tiaA_gainbits_rev5[] = { 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 }; - s8 tiaA_gainbits_rev6[] = { + static const s8 tiaA_gainbits_rev6[] = { 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 }; - s8 *tia_gainbits; - s8 lpf_gain_db[] = { 0x00, 0x06, 0x0c, 0x12, 0x12, 0x12 }; - s8 lpf_gainbits[] = { 0x00, 0x01, 0x02, 0x03, 0x03, 0x03 }; - u16 rfseqG_init_gain[] = { 0x613f, 0x613f, 0x613f, 0x613f }; - u16 rfseqG_init_gain_rev4[] = { 0x513f, 0x513f, 0x513f, 0x513f }; - u16 rfseqG_init_gain_rev5[] = { 0x413f, 0x413f, 0x413f, 0x413f }; - u16 rfseqG_init_gain_rev5_elna[] = { + const s8 *tia_gainbits; + static const s8 lpf_gain_db[] = { 0x00, 0x06, 0x0c, 0x12, 0x12, 0x12 }; + static const s8 lpf_gainbits[] = { 0x00, 0x01, 0x02, 0x03, 0x03, 0x03 }; + static const u16 rfseqG_init_gain[] = { 0x613f, 0x613f, 0x613f, 0x613f }; + static const u16 rfseqG_init_gain_rev4[] = { 0x513f, 0x513f, 0x513f, 0x513f }; + static const u16 rfseqG_init_gain_rev5[] = { 0x413f, 0x413f, 0x413f, 0x413f }; + static const u16 rfseqG_init_gain_rev5_elna[] = { 0x013f, 0x013f, 0x013f, 0x013f }; - u16 rfseqG_init_gain_rev6[] = { 0x513f, 0x513f }; - u16 rfseqG_init_gain_rev6_224B0[] = { 0x413f, 0x413f }; - u16 rfseqG_init_gain_rev6_elna[] = { 0x113f, 0x113f }; - u16 rfseqA_init_gain[] = { 0x516f, 0x516f, 0x516f, 0x516f }; - u16 rfseqA_init_gain_rev4[] = { 0x614f, 0x614f, 0x614f, 0x614f }; - u16 rfseqA_init_gain_rev4_elna[] = { + static const u16 rfseqG_init_gain_rev6[] = { 0x513f, 0x513f }; + static const u16 rfseqG_init_gain_rev6_224B0[] = { 0x413f, 0x413f }; + static const u16 rfseqG_init_gain_rev6_elna[] = { 0x113f, 0x113f }; + static const u16 rfseqA_init_gain[] = { 0x516f, 0x516f, 0x516f, 0x516f }; + static const u16 rfseqA_init_gain_rev4[] = { 0x614f, 0x614f, 0x614f, 0x614f }; + static const u16 rfseqA_init_gain_rev4_elna[] = { 0x314f, 0x314f, 0x314f, 0x314f }; - u16 rfseqA_init_gain_rev5[] = { 0x714f, 0x714f, 0x714f, 0x714f }; - u16 rfseqA_init_gain_rev6[] = { 0x714f, 0x714f }; - u16 *rfseq_init_gain; + static const u16 rfseqA_init_gain_rev5[] = { 0x714f, 0x714f, 0x714f, 0x714f }; + static const u16 rfseqA_init_gain_rev6[] = { 0x714f, 0x714f }; + const u16 *rfseq_init_gain; u16 initG_gaincode = 0x627e; u16 initG_gaincode_rev4 = 0x527e; u16 initG_gaincode_rev5 = 0x427e; @@ -15538,10 +15535,10 @@ static void wlc_phy_workarounds_nphy_gainctrl(struct brcms_phy *pi) u16 clip1mdA_gaincode_rev6 = 0x2084; u16 clip1md_gaincode = 0; u16 clip1loG_gaincode = 0x0074; - u16 clip1loG_gaincode_rev5[] = { + static const u16 clip1loG_gaincode_rev5[] = { 0x0062, 0x0064, 0x006a, 0x106a, 0x106c, 0x1074, 0x107c, 0x207c }; - u16 clip1loG_gaincode_rev6[] = { + static const u16 clip1loG_gaincode_rev6[] = { 0x106a, 0x106c, 0x1074, 0x107c, 0x007e, 0x107e, 0x207e, 0x307e }; u16 clip1loG_gaincode_rev6_224B0 = 0x1074; @@ -16066,7 +16063,7 @@ static void wlc_phy_workarounds_nphy_gainctrl(struct brcms_phy *pi) static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) { - u8 rfseq_rx2tx_events[] = { + static const u8 rfseq_rx2tx_events[] = { NPHY_RFSEQ_CMD_NOP, NPHY_RFSEQ_CMD_RXG_FBW, NPHY_RFSEQ_CMD_TR_SWITCH, @@ -16076,7 +16073,7 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) NPHY_RFSEQ_CMD_EXT_PA }; u8 rfseq_rx2tx_dlys[] = { 8, 6, 6, 2, 4, 60, 1 }; - u8 rfseq_tx2rx_events[] = { + static const u8 rfseq_tx2rx_events[] = { NPHY_RFSEQ_CMD_NOP, NPHY_RFSEQ_CMD_EXT_PA, NPHY_RFSEQ_CMD_TX_GAIN, @@ -16085,8 +16082,8 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) NPHY_RFSEQ_CMD_RXG_FBW, NPHY_RFSEQ_CMD_CLR_HIQ_DIS }; - u8 rfseq_tx2rx_dlys[] = { 8, 6, 2, 4, 4, 6, 1 }; - u8 rfseq_tx2rx_events_rev3[] = { + static const u8 rfseq_tx2rx_dlys[] = { 8, 6, 2, 4, 4, 6, 1 }; + static const u8 rfseq_tx2rx_events_rev3[] = { NPHY_REV3_RFSEQ_CMD_EXT_PA, NPHY_REV3_RFSEQ_CMD_INT_PA_PU, NPHY_REV3_RFSEQ_CMD_TX_GAIN, @@ -16096,7 +16093,7 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) NPHY_REV3_RFSEQ_CMD_CLR_HIQ_DIS, NPHY_REV3_RFSEQ_CMD_END }; - u8 rfseq_tx2rx_dlys_rev3[] = { 8, 4, 2, 2, 4, 4, 6, 1 }; + static const u8 rfseq_tx2rx_dlys_rev3[] = { 8, 4, 2, 2, 4, 4, 6, 1 }; u8 rfseq_rx2tx_events_rev3[] = { NPHY_REV3_RFSEQ_CMD_NOP, NPHY_REV3_RFSEQ_CMD_RXG_FBW, @@ -16110,7 +16107,7 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) }; u8 rfseq_rx2tx_dlys_rev3[] = { 8, 6, 6, 4, 4, 18, 42, 1, 1 }; - u8 rfseq_rx2tx_events_rev3_ipa[] = { + static const u8 rfseq_rx2tx_events_rev3_ipa[] = { NPHY_REV3_RFSEQ_CMD_NOP, NPHY_REV3_RFSEQ_CMD_RXG_FBW, NPHY_REV3_RFSEQ_CMD_TR_SWITCH, @@ -16121,15 +16118,15 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) NPHY_REV3_RFSEQ_CMD_INT_PA_PU, NPHY_REV3_RFSEQ_CMD_END }; - u8 rfseq_rx2tx_dlys_rev3_ipa[] = { 8, 6, 6, 4, 4, 16, 43, 1, 1 }; - u16 rfseq_rx2tx_dacbufpu_rev7[] = { 0x10f, 0x10f }; + static const u8 rfseq_rx2tx_dlys_rev3_ipa[] = { 8, 6, 6, 4, 4, 16, 43, 1, 1 }; + static const u16 rfseq_rx2tx_dacbufpu_rev7[] = { 0x10f, 0x10f }; s16 alpha0, alpha1, alpha2; s16 beta0, beta1, beta2; u32 leg_data_weights, ht_data_weights, nss1_data_weights, stbc_data_weights; u8 chan_freq_range = 0; - u16 dac_control = 0x0002; + static const u16 dac_control = 0x0002; u16 aux_adc_vmid_rev7_core0[] = { 0x8e, 0x96, 0x96, 0x96 }; u16 aux_adc_vmid_rev7_core1[] = { 0x8f, 0x9f, 0x9f, 0x96 }; u16 aux_adc_vmid_rev4[] = { 0xa2, 0xb4, 0xb4, 0x89 }; @@ -16139,8 +16136,8 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) u16 aux_adc_gain_rev4[] = { 0x02, 0x02, 0x02, 0x00 }; u16 aux_adc_gain_rev3[] = { 0x02, 0x02, 0x02, 0x00 }; u16 *aux_adc_gain; - u16 sk_adc_vmid[] = { 0xb4, 0xb4, 0xb4, 0x24 }; - u16 sk_adc_gain[] = { 0x02, 0x02, 0x02, 0x02 }; + static const u16 sk_adc_vmid[] = { 0xb4, 0xb4, 0xb4, 0x24 }; + static const u16 sk_adc_gain[] = { 0x02, 0x02, 0x02, 0x02 }; s32 min_nvar_val = 0x18d; s32 min_nvar_offset_6mbps = 20; u8 pdetrange; @@ -16151,9 +16148,9 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) u16 rfseq_rx2tx_lpf_h_hpc_rev7 = 0x77; u16 rfseq_tx2rx_lpf_h_hpc_rev7 = 0x77; u16 rfseq_pktgn_lpf_h_hpc_rev7 = 0x77; - u16 rfseq_htpktgn_lpf_hpc_rev7[] = { 0x77, 0x11, 0x11 }; - u16 rfseq_pktgn_lpf_hpc_rev7[] = { 0x11, 0x11 }; - u16 rfseq_cckpktgn_lpf_hpc_rev7[] = { 0x11, 0x11 }; + static const u16 rfseq_htpktgn_lpf_hpc_rev7[] = { 0x77, 0x11, 0x11 }; + static const u16 rfseq_pktgn_lpf_hpc_rev7[] = { 0x11, 0x11 }; + static const u16 rfseq_cckpktgn_lpf_hpc_rev7[] = { 0x11, 0x11 }; u16 ipalvlshift_3p3_war_en = 0; u16 rccal_bcap_val, rccal_scap_val; u16 rccal_tx20_11b_bcap = 0; @@ -24291,13 +24288,13 @@ static void wlc_phy_update_txcal_ladder_nphy(struct brcms_phy *pi, u16 core) u16 bbmult; u16 tblentry; - struct nphy_txiqcal_ladder ladder_lo[] = { + static const struct nphy_txiqcal_ladder ladder_lo[] = { {3, 0}, {4, 0}, {6, 0}, {9, 0}, {13, 0}, {18, 0}, {25, 0}, {25, 1}, {25, 2}, {25, 3}, {25, 4}, {25, 5}, {25, 6}, {25, 7}, {35, 7}, {50, 7}, {71, 7}, {100, 7} }; - struct nphy_txiqcal_ladder ladder_iq[] = { + static const struct nphy_txiqcal_ladder ladder_iq[] = { {3, 0}, {4, 0}, {6, 0}, {9, 0}, {13, 0}, {18, 0}, {25, 0}, {35, 0}, {50, 0}, {71, 0}, {100, 0}, {100, 1}, {100, 2}, {100, 3}, {100, 4}, {100, 5}, {100, 6}, {100, 7} @@ -25773,67 +25770,67 @@ wlc_phy_cal_txiqlo_nphy(struct brcms_phy *pi, struct nphy_txgains target_gain, u16 cal_gain[2]; struct nphy_iqcal_params cal_params[2]; u32 tbl_len; - void *tbl_ptr; + const void *tbl_ptr; bool ladder_updated[2]; u8 mphase_cal_lastphase = 0; int bcmerror = 0; bool phyhang_avoid_state = false; - u16 tbl_tx_iqlo_cal_loft_ladder_20[] = { + static const u16 tbl_tx_iqlo_cal_loft_ladder_20[] = { 0x0300, 0x0500, 0x0700, 0x0900, 0x0d00, 0x1100, 0x1900, 0x1901, 0x1902, 0x1903, 0x1904, 0x1905, 0x1906, 0x1907, 0x2407, 0x3207, 0x4607, 0x6407 }; - u16 tbl_tx_iqlo_cal_iqimb_ladder_20[] = { + static const u16 tbl_tx_iqlo_cal_iqimb_ladder_20[] = { 0x0200, 0x0300, 0x0600, 0x0900, 0x0d00, 0x1100, 0x1900, 0x2400, 0x3200, 0x4600, 0x6400, 0x6401, 0x6402, 0x6403, 0x6404, 0x6405, 0x6406, 0x6407 }; - u16 tbl_tx_iqlo_cal_loft_ladder_40[] = { + static const u16 tbl_tx_iqlo_cal_loft_ladder_40[] = { 0x0200, 0x0300, 0x0400, 0x0700, 0x0900, 0x0c00, 0x1200, 0x1201, 0x1202, 0x1203, 0x1204, 0x1205, 0x1206, 0x1207, 0x1907, 0x2307, 0x3207, 0x4707 }; - u16 tbl_tx_iqlo_cal_iqimb_ladder_40[] = { + static const u16 tbl_tx_iqlo_cal_iqimb_ladder_40[] = { 0x0100, 0x0200, 0x0400, 0x0700, 0x0900, 0x0c00, 0x1200, 0x1900, 0x2300, 0x3200, 0x4700, 0x4701, 0x4702, 0x4703, 0x4704, 0x4705, 0x4706, 0x4707 }; - u16 tbl_tx_iqlo_cal_startcoefs[] = { + static const u16 tbl_tx_iqlo_cal_startcoefs[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; - u16 tbl_tx_iqlo_cal_cmds_fullcal[] = { + static const u16 tbl_tx_iqlo_cal_cmds_fullcal[] = { 0x8123, 0x8264, 0x8086, 0x8245, 0x8056, 0x9123, 0x9264, 0x9086, 0x9245, 0x9056 }; - u16 tbl_tx_iqlo_cal_cmds_recal[] = { + static const u16 tbl_tx_iqlo_cal_cmds_recal[] = { 0x8101, 0x8253, 0x8053, 0x8234, 0x8034, 0x9101, 0x9253, 0x9053, 0x9234, 0x9034 }; - u16 tbl_tx_iqlo_cal_startcoefs_nphyrev3[] = { + static const u16 tbl_tx_iqlo_cal_startcoefs_nphyrev3[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; - u16 tbl_tx_iqlo_cal_cmds_fullcal_nphyrev3[] = { + static const u16 tbl_tx_iqlo_cal_cmds_fullcal_nphyrev3[] = { 0x8434, 0x8334, 0x8084, 0x8267, 0x8056, 0x8234, 0x9434, 0x9334, 0x9084, 0x9267, 0x9056, 0x9234 }; - u16 tbl_tx_iqlo_cal_cmds_recal_nphyrev3[] = { + static const u16 tbl_tx_iqlo_cal_cmds_recal_nphyrev3[] = { 0x8423, 0x8323, 0x8073, 0x8256, 0x8045, 0x8223, 0x9423, 0x9323, 0x9073, 0x9256, 0x9045, 0x9223 }; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 3bd6fc1b76d4..33f4d7c7b53a 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -78,6 +78,7 @@ /* NVM offsets (in words) definitions */ enum wkp_nvm_offsets { /* NVM HW-Section offset (in words) definitions */ + SUBSYSTEM_ID = 0x0A, HW_ADDR = 0x15, /* NVM SW-Section offset (in words) definitions */ @@ -262,13 +263,12 @@ static u32 iwl_get_channel_flags(u8 ch_num, int ch_idx, bool is_5ghz, static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg, struct iwl_nvm_data *data, const __le16 * const nvm_ch_flags, - bool lar_supported) + bool lar_supported, bool no_wide_in_5ghz) { int ch_idx; int n_channels = 0; struct ieee80211_channel *channel; u16 ch_flags; - bool is_5ghz; int num_of_ch, num_2ghz_channels; const u8 *nvm_chan; @@ -283,12 +283,20 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg, } for (ch_idx = 0; ch_idx < num_of_ch; ch_idx++) { + bool is_5ghz = (ch_idx >= num_2ghz_channels); + ch_flags = __le16_to_cpup(nvm_ch_flags + ch_idx); - if (ch_idx >= num_2ghz_channels && - !data->sku_cap_band_52GHz_enable) + if (is_5ghz && !data->sku_cap_band_52GHz_enable) continue; + /* workaround to disable wide channels in 5GHz */ + if (no_wide_in_5ghz && is_5ghz) { + ch_flags &= ~(NVM_CHANNEL_40MHZ | + NVM_CHANNEL_80MHZ | + NVM_CHANNEL_160MHZ); + } + if (ch_flags & NVM_CHANNEL_160MHZ) data->vht160_supported = true; @@ -311,8 +319,8 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg, n_channels++; channel->hw_value = nvm_chan[ch_idx]; - channel->band = (ch_idx < num_2ghz_channels) ? - NL80211_BAND_2GHZ : NL80211_BAND_5GHZ; + channel->band = is_5ghz ? + NL80211_BAND_5GHZ : NL80211_BAND_2GHZ; channel->center_freq = ieee80211_channel_to_frequency( channel->hw_value, channel->band); @@ -324,7 +332,6 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg, * is not used in mvm, and is used for backwards compatibility */ channel->max_power = IWL_DEFAULT_MAX_TX_POWER; - is_5ghz = channel->band == NL80211_BAND_5GHZ; /* don't put limitations in case we're using LAR */ if (!lar_supported) @@ -441,7 +448,8 @@ static void iwl_init_vht_hw_capab(const struct iwl_cfg *cfg, static void iwl_init_sbands(struct device *dev, const struct iwl_cfg *cfg, struct iwl_nvm_data *data, const __le16 *ch_section, - u8 tx_chains, u8 rx_chains, bool lar_supported) + u8 tx_chains, u8 rx_chains, bool lar_supported, + bool no_wide_in_5ghz) { int n_channels; int n_used = 0; @@ -450,12 +458,14 @@ static void iwl_init_sbands(struct device *dev, const struct iwl_cfg *cfg, if (cfg->device_family != IWL_DEVICE_FAMILY_8000) n_channels = iwl_init_channel_map( dev, cfg, data, - &ch_section[NVM_CHANNELS], lar_supported); + &ch_section[NVM_CHANNELS], lar_supported, + no_wide_in_5ghz); else n_channels = iwl_init_channel_map( dev, cfg, data, &ch_section[NVM_CHANNELS_FAMILY_8000], - lar_supported); + lar_supported, + no_wide_in_5ghz); sband = &data->bands[NL80211_BAND_2GHZ]; sband->band = NL80211_BAND_2GHZ; @@ -658,6 +668,39 @@ static int iwl_set_hw_address(struct iwl_trans *trans, return 0; } +static bool +iwl_nvm_no_wide_in_5ghz(struct device *dev, const struct iwl_cfg *cfg, + const __le16 *nvm_hw) +{ + /* + * Workaround a bug in Indonesia SKUs where the regulatory in + * some 7000-family OTPs erroneously allow wide channels in + * 5GHz. To check for Indonesia, we take the SKU value from + * bits 1-4 in the subsystem ID and check if it is either 5 or + * 9. In those cases, we need to force-disable wide channels + * in 5GHz otherwise the FW will throw a sysassert when we try + * to use them. + */ + if (cfg->device_family == IWL_DEVICE_FAMILY_7000) { + /* + * Unlike the other sections in the NVM, the hw + * section uses big-endian. + */ + u16 subsystem_id = be16_to_cpup((const __be16 *)nvm_hw + + SUBSYSTEM_ID); + u8 sku = (subsystem_id & 0x1e) >> 1; + + if (sku == 5 || sku == 9) { + IWL_DEBUG_EEPROM(dev, + "disabling wide channels in 5GHz (0x%0x %d)\n", + subsystem_id, sku); + return true; + } + } + + return false; +} + struct iwl_nvm_data * iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, const __le16 *nvm_hw, const __le16 *nvm_sw, @@ -668,6 +711,7 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, struct device *dev = trans->dev; struct iwl_nvm_data *data; bool lar_enabled; + bool no_wide_in_5ghz = iwl_nvm_no_wide_in_5ghz(dev, cfg, nvm_hw); u32 sku, radio_cfg; u16 lar_config; const __le16 *ch_section; @@ -738,7 +782,7 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, } iwl_init_sbands(dev, cfg, data, ch_section, tx_chains, rx_chains, - lar_fw_supported && lar_enabled); + lar_fw_supported && lar_enabled, no_wide_in_5ghz); data->calib_version = 255; return data; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 1db1dc13e988..9789f3c5a785 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1548,6 +1548,11 @@ static void iwl_mvm_mc_iface_iterator(void *_data, u8 *mac, struct iwl_mvm_mc_iter_data *data = _data; struct iwl_mvm *mvm = data->mvm; struct iwl_mcast_filter_cmd *cmd = mvm->mcast_filter_cmd; + struct iwl_host_cmd hcmd = { + .id = MCAST_FILTER_CMD, + .flags = CMD_ASYNC, + .dataflags[0] = IWL_HCMD_DFL_NOCOPY, + }; int ret, len; /* if we don't have free ports, mcast frames will be dropped */ @@ -1562,7 +1567,10 @@ static void iwl_mvm_mc_iface_iterator(void *_data, u8 *mac, memcpy(cmd->bssid, vif->bss_conf.bssid, ETH_ALEN); len = roundup(sizeof(*cmd) + cmd->count * ETH_ALEN, 4); - ret = iwl_mvm_send_cmd_pdu(mvm, MCAST_FILTER_CMD, CMD_ASYNC, len, cmd); + hcmd.len[0] = len; + hcmd.data[0] = cmd; + + ret = iwl_mvm_send_cmd(mvm, &hcmd); if (ret) IWL_ERR(mvm, "mcast filter cmd error. ret=%d\n", ret); } diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 0fd7d7ed07ce..d2a28a9d3209 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1357,8 +1357,6 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, txi->control.rates, ARRAY_SIZE(txi->control.rates)); - txi->rate_driver_data[0] = channel; - if (skb->len >= 24 + 8 && ieee80211_is_probe_resp(hdr->frame_control)) { /* fake header transmission time */ @@ -3048,6 +3046,7 @@ static int hwsim_register_received_nl(struct sk_buff *skb_2, static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) { struct hwsim_new_radio_params param = { 0 }; + const char *hwname = NULL; param.reg_strict = info->attrs[HWSIM_ATTR_REG_STRICT_REG]; param.p2p_device = info->attrs[HWSIM_ATTR_SUPPORT_P2P_DEVICE]; @@ -3061,8 +3060,14 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) if (info->attrs[HWSIM_ATTR_NO_VIF]) param.no_vif = true; - if (info->attrs[HWSIM_ATTR_RADIO_NAME]) - param.hwname = nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME]); + if (info->attrs[HWSIM_ATTR_RADIO_NAME]) { + hwname = kasprintf(GFP_KERNEL, "%.*s", + nla_len(info->attrs[HWSIM_ATTR_RADIO_NAME]), + (char *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME])); + if (!hwname) + return -ENOMEM; + param.hwname = hwname; + } if (info->attrs[HWSIM_ATTR_USE_CHANCTX]) param.use_chanctx = true; @@ -3090,11 +3095,15 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info) s64 idx = -1; const char *hwname = NULL; - if (info->attrs[HWSIM_ATTR_RADIO_ID]) + if (info->attrs[HWSIM_ATTR_RADIO_ID]) { idx = nla_get_u32(info->attrs[HWSIM_ATTR_RADIO_ID]); - else if (info->attrs[HWSIM_ATTR_RADIO_NAME]) - hwname = (void *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME]); - else + } else if (info->attrs[HWSIM_ATTR_RADIO_NAME]) { + hwname = kasprintf(GFP_KERNEL, "%.*s", + nla_len(info->attrs[HWSIM_ATTR_RADIO_NAME]), + (char *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME])); + if (!hwname) + return -ENOMEM; + } else return -EINVAL; spin_lock_bh(&hwsim_radio_lock); @@ -3103,7 +3112,8 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info) if (data->idx != idx) continue; } else { - if (strcmp(hwname, wiphy_name(data->hw->wiphy))) + if (!hwname || + strcmp(hwname, wiphy_name(data->hw->wiphy))) continue; } @@ -3114,10 +3124,12 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info) spin_unlock_bh(&hwsim_radio_lock); mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy), info); + kfree(hwname); return 0; } spin_unlock_bh(&hwsim_radio_lock); + kfree(hwname); return -ENODEV; } diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 82d949ede294..4e725d165aa6 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -6316,6 +6316,13 @@ static struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x7392, 0x7822, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, +/* found in rtl8192eu vendor driver */ +{USB_DEVICE_AND_INTERFACE_INFO(0x2357, 0x0107, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192eu_fops}, +{USB_DEVICE_AND_INTERFACE_INFO(0x2019, 0xab33, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192eu_fops}, +{USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x818c, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192eu_fops}, #endif { } }; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index 2cbef9647acc..1281ebe0c30a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -1128,7 +1128,7 @@ static u8 _rtl8821ae_dbi_read(struct rtl_priv *rtlpriv, u16 addr) } if (0 == tmp) { read_addr = REG_DBI_RDATA + addr % 4; - ret = rtl_read_byte(rtlpriv, read_addr); + ret = rtl_read_word(rtlpriv, read_addr); } return ret; } diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c index e8c5dddc54ba..3c4c58b9fe76 100644 --- a/drivers/net/xen-netback/hash.c +++ b/drivers/net/xen-netback/hash.c @@ -39,7 +39,7 @@ static void xenvif_add_hash(struct xenvif *vif, const u8 *tag, unsigned long flags; bool found; - new = kmalloc(sizeof(*entry), GFP_KERNEL); + new = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!new) return; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 14eac73e8dbc..54ea90f89b70 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -96,7 +96,7 @@ struct nvme_dev { struct mutex shutdown_lock; bool subsystem; void __iomem *cmb; - dma_addr_t cmb_dma_addr; + pci_bus_addr_t cmb_bus_addr; u64 cmb_size; u32 cmbsz; u32 cmbloc; @@ -1037,7 +1037,7 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) { unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth), dev->ctrl.page_size); - nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset; + nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset; nvmeq->sq_cmds_io = dev->cmb + offset; } else { nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), @@ -1343,7 +1343,7 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev) resource_size_t bar_size; struct pci_dev *pdev = to_pci_dev(dev->dev); void __iomem *cmb; - dma_addr_t dma_addr; + int bar; dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ); if (!(NVME_CMB_SZ(dev->cmbsz))) @@ -1356,7 +1356,8 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev) szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz)); size = szu * NVME_CMB_SZ(dev->cmbsz); offset = szu * NVME_CMB_OFST(dev->cmbloc); - bar_size = pci_resource_len(pdev, NVME_CMB_BIR(dev->cmbloc)); + bar = NVME_CMB_BIR(dev->cmbloc); + bar_size = pci_resource_len(pdev, bar); if (offset > bar_size) return NULL; @@ -1369,12 +1370,11 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev) if (size > bar_size - offset) size = bar_size - offset; - dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(dev->cmbloc)) + offset; - cmb = ioremap_wc(dma_addr, size); + cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size); if (!cmb) return NULL; - dev->cmb_dma_addr = dma_addr; + dev->cmb_bus_addr = pci_bus_address(pdev, bar) + offset; dev->cmb_size = size; return cmb; } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 286fda4ee100..ab4f8db2a8ca 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -337,8 +337,6 @@ static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl, struct ib_device *ibdev = dev->dev; int ret; - BUG_ON(queue_idx >= ctrl->queue_count); - ret = nvme_rdma_alloc_qe(ibdev, &req->sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); if (ret) @@ -643,8 +641,22 @@ out_free_queues: static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) { + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + unsigned int nr_io_queues; int i, ret; + nr_io_queues = min(opts->nr_io_queues, num_online_cpus()); + ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); + if (ret) + return ret; + + ctrl->queue_count = nr_io_queues + 1; + if (ctrl->queue_count < 2) + return 0; + + dev_info(ctrl->ctrl.device, + "creating %d I/O queues.\n", nr_io_queues); + for (i = 1; i < ctrl->queue_count; i++) { ret = nvme_rdma_init_queue(ctrl, i, ctrl->ctrl.opts->queue_size); @@ -1795,20 +1807,8 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl) { - struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; int ret; - ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); - if (ret) - return ret; - - ctrl->queue_count = opts->nr_io_queues + 1; - if (ctrl->queue_count < 2) - return 0; - - dev_info(ctrl->ctrl.device, - "creating %d I/O queues.\n", opts->nr_io_queues); - ret = nvme_rdma_init_io_queues(ctrl); if (ret) return ret; diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index b57fc6d6e28a..d08dfc8b9ba9 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -586,6 +586,14 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id) events = status & (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD | PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_CC | PCI_EXP_SLTSTA_DLLSC); + + /* + * If we've already reported a power fault, don't report it again + * until we've done something to handle it. + */ + if (ctrl->power_fault_detected) + events &= ~PCI_EXP_SLTSTA_PFD; + if (!events) return IRQ_NONE; diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c index de0ea474fb73..e5824c7b7b6b 100644 --- a/drivers/pci/hotplug/shpchp_hpc.c +++ b/drivers/pci/hotplug/shpchp_hpc.c @@ -1062,6 +1062,8 @@ int shpc_init(struct controller *ctrl, struct pci_dev *pdev) if (rc) { ctrl_info(ctrl, "Can't get msi for the hotplug controller\n"); ctrl_info(ctrl, "Use INTx for the hotplug controller\n"); + } else { + pci_set_master(pdev); } rc = request_irq(ctrl->pci_dev->irq, shpc_isr, IRQF_SHARED, diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 1b0786555394..f9f4d1c18eb2 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -527,7 +527,7 @@ static ssize_t driver_override_store(struct device *dev, const char *buf, size_t count) { struct pci_dev *pdev = to_pci_dev(dev); - char *driver_override, *old = pdev->driver_override, *cp; + char *driver_override, *old, *cp; /* We need to keep extra room for a newline */ if (count >= (PAGE_SIZE - 1)) @@ -541,12 +541,15 @@ static ssize_t driver_override_store(struct device *dev, if (cp) *cp = '\0'; + device_lock(dev); + old = pdev->driver_override; if (strlen(driver_override)) { pdev->driver_override = driver_override; } else { kfree(driver_override); pdev->driver_override = NULL; } + device_unlock(dev); kfree(old); @@ -557,8 +560,12 @@ static ssize_t driver_override_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); + ssize_t len; - return snprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override); + device_lock(dev); + len = snprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override); + device_unlock(dev); + return len; } static DEVICE_ATTR_RW(driver_override); diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig index 0e75d94972ba..671610c989b6 100644 --- a/drivers/pinctrl/Kconfig +++ b/drivers/pinctrl/Kconfig @@ -82,6 +82,7 @@ config PINCTRL_AMD tristate "AMD GPIO pin control" depends on GPIOLIB select GPIOLIB_IRQCHIP + select PINMUX select PINCONF select GENERIC_PINCONF help diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index c9a146948192..a5b7bd3c9bac 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -32,6 +32,7 @@ #include #include +#include "core.h" #include "pinctrl-utils.h" #include "pinctrl-amd.h" @@ -712,6 +713,69 @@ static const struct pinconf_ops amd_pinconf_ops = { .pin_config_group_set = amd_pinconf_group_set, }; +#ifdef CONFIG_PM_SLEEP +static bool amd_gpio_should_save(struct amd_gpio *gpio_dev, unsigned int pin) +{ + const struct pin_desc *pd = pin_desc_get(gpio_dev->pctrl, pin); + + if (!pd) + return false; + + /* + * Only restore the pin if it is actually in use by the kernel (or + * by userspace). + */ + if (pd->mux_owner || pd->gpio_owner || + gpiochip_line_is_irq(&gpio_dev->gc, pin)) + return true; + + return false; +} + +int amd_gpio_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct amd_gpio *gpio_dev = platform_get_drvdata(pdev); + struct pinctrl_desc *desc = gpio_dev->pctrl->desc; + int i; + + for (i = 0; i < desc->npins; i++) { + int pin = desc->pins[i].number; + + if (!amd_gpio_should_save(gpio_dev, pin)) + continue; + + gpio_dev->saved_regs[i] = readl(gpio_dev->base + pin*4); + } + + return 0; +} + +int amd_gpio_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct amd_gpio *gpio_dev = platform_get_drvdata(pdev); + struct pinctrl_desc *desc = gpio_dev->pctrl->desc; + int i; + + for (i = 0; i < desc->npins; i++) { + int pin = desc->pins[i].number; + + if (!amd_gpio_should_save(gpio_dev, pin)) + continue; + + writel(gpio_dev->saved_regs[i], gpio_dev->base + pin*4); + } + + return 0; +} + +static const struct dev_pm_ops amd_gpio_pm_ops = { + SET_LATE_SYSTEM_SLEEP_PM_OPS(amd_gpio_suspend, + amd_gpio_resume) +}; +#endif + static struct pinctrl_desc amd_pinctrl_desc = { .pins = kerncz_pins, .npins = ARRAY_SIZE(kerncz_pins), @@ -751,6 +815,14 @@ static int amd_gpio_probe(struct platform_device *pdev) return -EINVAL; } +#ifdef CONFIG_PM_SLEEP + gpio_dev->saved_regs = devm_kcalloc(&pdev->dev, amd_pinctrl_desc.npins, + sizeof(*gpio_dev->saved_regs), + GFP_KERNEL); + if (!gpio_dev->saved_regs) + return -ENOMEM; +#endif + gpio_dev->pdev = pdev; gpio_dev->gc.direction_input = amd_gpio_direction_input; gpio_dev->gc.direction_output = amd_gpio_direction_output; @@ -839,6 +911,9 @@ static struct platform_driver amd_gpio_driver = { .driver = { .name = "amd_gpio", .acpi_match_table = ACPI_PTR(amd_gpio_acpi_match), +#ifdef CONFIG_PM_SLEEP + .pm = &amd_gpio_pm_ops, +#endif }, .probe = amd_gpio_probe, .remove = amd_gpio_remove, diff --git a/drivers/pinctrl/pinctrl-amd.h b/drivers/pinctrl/pinctrl-amd.h index 7bfea47dbb47..e8bbb20779d0 100644 --- a/drivers/pinctrl/pinctrl-amd.h +++ b/drivers/pinctrl/pinctrl-amd.h @@ -95,6 +95,7 @@ struct amd_gpio { struct gpio_chip gc; struct resource *res; struct platform_device *pdev; + u32 *saved_regs; }; /* KERNCZ configuration*/ diff --git a/drivers/power/supply/axp288_fuel_gauge.c b/drivers/power/supply/axp288_fuel_gauge.c index 5bdde692f724..f62f9dfea984 100644 --- a/drivers/power/supply/axp288_fuel_gauge.c +++ b/drivers/power/supply/axp288_fuel_gauge.c @@ -169,8 +169,10 @@ static int fuel_gauge_reg_readb(struct axp288_fg_info *info, int reg) break; } - if (ret < 0) + if (ret < 0) { dev_err(&info->pdev->dev, "axp288 reg read err:%d\n", ret); + return ret; + } return val; } diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c index 9d19b9a62011..315a4be8dc1e 100644 --- a/drivers/rapidio/devices/tsi721.c +++ b/drivers/rapidio/devices/tsi721.c @@ -37,8 +37,8 @@ #include "tsi721.h" #ifdef DEBUG -u32 dbg_level; -module_param(dbg_level, uint, S_IWUSR | S_IRUGO); +u32 tsi_dbg_level; +module_param_named(dbg_level, tsi_dbg_level, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(dbg_level, "Debugging output level (default 0 = none)"); #endif diff --git a/drivers/rapidio/devices/tsi721.h b/drivers/rapidio/devices/tsi721.h index 5941437cbdd1..957eadc58150 100644 --- a/drivers/rapidio/devices/tsi721.h +++ b/drivers/rapidio/devices/tsi721.h @@ -40,11 +40,11 @@ enum { }; #ifdef DEBUG -extern u32 dbg_level; +extern u32 tsi_dbg_level; #define tsi_debug(level, dev, fmt, arg...) \ do { \ - if (DBG_##level & dbg_level) \ + if (DBG_##level & tsi_dbg_level) \ dev_dbg(dev, "%s: " fmt "\n", __func__, ##arg); \ } while (0) #else diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 9faccfceb53c..9403245503de 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -4507,6 +4507,16 @@ static int __init regulator_init_complete(void) if (of_have_populated_dt()) has_full_constraints = true; + /* + * Regulators may had failed to resolve their input supplies + * when were registered, either because the input supply was + * not registered yet or because its parent device was not + * bound yet. So attempt to resolve the input supplies for + * pending regulators before trying to disable unused ones. + */ + class_for_each_device(®ulator_class, NULL, NULL, + regulator_register_resolve_supply); + /* If we have a full configuration then disable any regulators * we have permission to change the status for and which are * not in use or always_on. This is effectively the default diff --git a/drivers/reset/reset-ti-syscon.c b/drivers/reset/reset-ti-syscon.c index 47f0ffd3b013..1799fd423901 100644 --- a/drivers/reset/reset-ti-syscon.c +++ b/drivers/reset/reset-ti-syscon.c @@ -154,8 +154,8 @@ static int ti_syscon_reset_status(struct reset_controller_dev *rcdev, if (ret) return ret; - return (reset_state & BIT(control->status_bit)) && - (control->flags & STATUS_SET); + return !(reset_state & BIT(control->status_bit)) == + !(control->flags & STATUS_SET); } static struct reset_control_ops ti_syscon_reset_ops = { diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index d5bf36ec8a75..34367d172961 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -3,7 +3,7 @@ * * Debug traces for zfcp. * - * Copyright IBM Corp. 2002, 2016 + * Copyright IBM Corp. 2002, 2017 */ #define KMSG_COMPONENT "zfcp" @@ -447,6 +447,7 @@ static u16 zfcp_dbf_san_res_cap_len_if_gpn_ft(char *tag, struct fc_ct_hdr *reqh = sg_virt(ct_els->req); struct fc_ns_gid_ft *reqn = (struct fc_ns_gid_ft *)(reqh + 1); struct scatterlist *resp_entry = ct_els->resp; + struct fc_ct_hdr *resph; struct fc_gpn_ft_resp *acc; int max_entries, x, last = 0; @@ -473,6 +474,13 @@ static u16 zfcp_dbf_san_res_cap_len_if_gpn_ft(char *tag, return len; /* not GPN_FT response so do not cap */ acc = sg_virt(resp_entry); + + /* cap all but accept CT responses to at least the CT header */ + resph = (struct fc_ct_hdr *)acc; + if ((ct_els->status) || + (resph->ct_cmd != cpu_to_be16(FC_FS_ACC))) + return max(FC_CT_HDR_LEN, ZFCP_DBF_SAN_MAX_PAYLOAD); + max_entries = (reqh->ct_mr_size * 4 / sizeof(struct fc_gpn_ft_resp)) + 1 /* zfcp_fc_scan_ports: bytes correct, entries off-by-one * to account for header as 1st pseudo "entry" */; @@ -555,8 +563,8 @@ void zfcp_dbf_scsi(char *tag, int level, struct scsi_cmnd *sc, rec->scsi_retries = sc->retries; rec->scsi_allowed = sc->allowed; rec->scsi_id = sc->device->id; - /* struct zfcp_dbf_scsi needs to be updated to handle 64bit LUNs */ rec->scsi_lun = (u32)sc->device->lun; + rec->scsi_lun_64_hi = (u32)(sc->device->lun >> 32); rec->host_scribble = (unsigned long)sc->host_scribble; memcpy(rec->scsi_opcode, sc->cmnd, @@ -564,19 +572,32 @@ void zfcp_dbf_scsi(char *tag, int level, struct scsi_cmnd *sc, if (fsf) { rec->fsf_req_id = fsf->req_id; + rec->pl_len = FCP_RESP_WITH_EXT; fcp_rsp = (struct fcp_resp_with_ext *) &(fsf->qtcb->bottom.io.fcp_rsp); + /* mandatory parts of FCP_RSP IU in this SCSI record */ memcpy(&rec->fcp_rsp, fcp_rsp, FCP_RESP_WITH_EXT); if (fcp_rsp->resp.fr_flags & FCP_RSP_LEN_VAL) { fcp_rsp_info = (struct fcp_resp_rsp_info *) &fcp_rsp[1]; rec->fcp_rsp_info = fcp_rsp_info->rsp_code; + rec->pl_len += be32_to_cpu(fcp_rsp->ext.fr_rsp_len); } if (fcp_rsp->resp.fr_flags & FCP_SNS_LEN_VAL) { - rec->pl_len = min((u16)SCSI_SENSE_BUFFERSIZE, - (u16)ZFCP_DBF_PAY_MAX_REC); - zfcp_dbf_pl_write(dbf, sc->sense_buffer, rec->pl_len, - "fcp_sns", fsf->req_id); + rec->pl_len += be32_to_cpu(fcp_rsp->ext.fr_sns_len); } + /* complete FCP_RSP IU in associated PAYload record + * but only if there are optional parts + */ + if (fcp_rsp->resp.fr_flags != 0) + zfcp_dbf_pl_write( + dbf, fcp_rsp, + /* at least one full PAY record + * but not beyond hardware response field + */ + min_t(u16, max_t(u16, rec->pl_len, + ZFCP_DBF_PAY_MAX_REC), + FSF_FCP_RSP_SIZE), + "fcp_riu", fsf->req_id); } debug_event(dbf->scsi, level, rec, sizeof(*rec)); diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h index db186d44cfaf..b60667c145fd 100644 --- a/drivers/s390/scsi/zfcp_dbf.h +++ b/drivers/s390/scsi/zfcp_dbf.h @@ -2,7 +2,7 @@ * zfcp device driver * debug feature declarations * - * Copyright IBM Corp. 2008, 2016 + * Copyright IBM Corp. 2008, 2017 */ #ifndef ZFCP_DBF_H @@ -204,7 +204,7 @@ enum zfcp_dbf_scsi_id { * @id: unique number of recovery record type * @tag: identifier string specifying the location of initiation * @scsi_id: scsi device id - * @scsi_lun: scsi device logical unit number + * @scsi_lun: scsi device logical unit number, low part of 64 bit, old 32 bit * @scsi_result: scsi result * @scsi_retries: current retry number of scsi request * @scsi_allowed: allowed retries @@ -214,6 +214,7 @@ enum zfcp_dbf_scsi_id { * @host_scribble: LLD specific data attached to SCSI request * @pl_len: length of paload stored as zfcp_dbf_pay * @fsf_rsp: response for fsf request + * @scsi_lun_64_hi: scsi device logical unit number, high part of 64 bit */ struct zfcp_dbf_scsi { u8 id; @@ -230,6 +231,7 @@ struct zfcp_dbf_scsi { u64 host_scribble; u16 pl_len; struct fcp_resp_with_ext fcp_rsp; + u32 scsi_lun_64_hi; } __packed; /** @@ -323,7 +325,11 @@ void zfcp_dbf_hba_fsf_response(struct zfcp_fsf_req *req) { struct fsf_qtcb *qtcb = req->qtcb; - if ((qtcb->prefix.prot_status != FSF_PROT_GOOD) && + if (unlikely(req->status & (ZFCP_STATUS_FSFREQ_DISMISSED | + ZFCP_STATUS_FSFREQ_ERROR))) { + zfcp_dbf_hba_fsf_resp("fs_rerr", 3, req); + + } else if ((qtcb->prefix.prot_status != FSF_PROT_GOOD) && (qtcb->prefix.prot_status != FSF_PROT_FSF_STATUS_PRESENTED)) { zfcp_dbf_hba_fsf_resp("fs_perr", 1, req); @@ -401,7 +407,8 @@ void zfcp_dbf_scsi_abort(char *tag, struct scsi_cmnd *scmd, * @flag: indicates type of reset (Target Reset, Logical Unit Reset) */ static inline -void zfcp_dbf_scsi_devreset(char *tag, struct scsi_cmnd *scmnd, u8 flag) +void zfcp_dbf_scsi_devreset(char *tag, struct scsi_cmnd *scmnd, u8 flag, + struct zfcp_fsf_req *fsf_req) { char tmp_tag[ZFCP_DBF_TAG_LEN]; @@ -411,7 +418,7 @@ void zfcp_dbf_scsi_devreset(char *tag, struct scsi_cmnd *scmnd, u8 flag) memcpy(tmp_tag, "lr_", 3); memcpy(&tmp_tag[3], tag, 4); - _zfcp_dbf_scsi(tmp_tag, 1, scmnd, NULL); + _zfcp_dbf_scsi(tmp_tag, 1, scmnd, fsf_req); } /** diff --git a/drivers/s390/scsi/zfcp_fc.h b/drivers/s390/scsi/zfcp_fc.h index df2b541c8287..a2275825186f 100644 --- a/drivers/s390/scsi/zfcp_fc.h +++ b/drivers/s390/scsi/zfcp_fc.h @@ -4,7 +4,7 @@ * Fibre Channel related definitions and inline functions for the zfcp * device driver * - * Copyright IBM Corp. 2009 + * Copyright IBM Corp. 2009, 2017 */ #ifndef ZFCP_FC_H @@ -279,6 +279,10 @@ void zfcp_fc_eval_fcp_rsp(struct fcp_resp_with_ext *fcp_rsp, !(rsp_flags & FCP_SNS_LEN_VAL) && fcp_rsp->resp.fr_status == SAM_STAT_GOOD) set_host_byte(scsi, DID_ERROR); + } else if (unlikely(rsp_flags & FCP_RESID_OVER)) { + /* FCP_DL was not sufficient for SCSI data length */ + if (fcp_rsp->resp.fr_status == SAM_STAT_GOOD) + set_host_byte(scsi, DID_ERROR); } } diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 27ff38f839fc..1964391db904 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -928,8 +928,8 @@ static void zfcp_fsf_send_ct_handler(struct zfcp_fsf_req *req) switch (header->fsf_status) { case FSF_GOOD: - zfcp_dbf_san_res("fsscth2", req); ct->status = 0; + zfcp_dbf_san_res("fsscth2", req); break; case FSF_SERVICE_CLASS_NOT_SUPPORTED: zfcp_fsf_class_not_supp(req); @@ -1109,8 +1109,8 @@ static void zfcp_fsf_send_els_handler(struct zfcp_fsf_req *req) switch (header->fsf_status) { case FSF_GOOD: - zfcp_dbf_san_res("fsselh1", req); send_els->status = 0; + zfcp_dbf_san_res("fsselh1", req); break; case FSF_SERVICE_CLASS_NOT_SUPPORTED: zfcp_fsf_class_not_supp(req); @@ -2258,7 +2258,8 @@ int zfcp_fsf_fcp_cmnd(struct scsi_cmnd *scsi_cmnd) fcp_cmnd = (struct fcp_cmnd *) &req->qtcb->bottom.io.fcp_cmnd; zfcp_fc_scsi_to_fcp(fcp_cmnd, scsi_cmnd, 0); - if (scsi_prot_sg_count(scsi_cmnd)) { + if ((scsi_get_prot_op(scsi_cmnd) != SCSI_PROT_NORMAL) && + scsi_prot_sg_count(scsi_cmnd)) { zfcp_qdio_set_data_div(qdio, &req->qdio_req, scsi_prot_sg_count(scsi_cmnd)); retval = zfcp_qdio_sbals_from_sg(qdio, &req->qdio_req, diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 07ffdbb5107f..9bd9b9a29dfc 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -3,7 +3,7 @@ * * Interface to Linux SCSI midlayer. * - * Copyright IBM Corp. 2002, 2016 + * Copyright IBM Corp. 2002, 2017 */ #define KMSG_COMPONENT "zfcp" @@ -273,25 +273,29 @@ static int zfcp_task_mgmt_function(struct scsi_cmnd *scpnt, u8 tm_flags) zfcp_erp_wait(adapter); ret = fc_block_scsi_eh(scpnt); - if (ret) + if (ret) { + zfcp_dbf_scsi_devreset("fiof", scpnt, tm_flags, NULL); return ret; + } if (!(atomic_read(&adapter->status) & ZFCP_STATUS_COMMON_RUNNING)) { - zfcp_dbf_scsi_devreset("nres", scpnt, tm_flags); + zfcp_dbf_scsi_devreset("nres", scpnt, tm_flags, NULL); return SUCCESS; } } - if (!fsf_req) + if (!fsf_req) { + zfcp_dbf_scsi_devreset("reqf", scpnt, tm_flags, NULL); return FAILED; + } wait_for_completion(&fsf_req->completion); if (fsf_req->status & ZFCP_STATUS_FSFREQ_TMFUNCFAILED) { - zfcp_dbf_scsi_devreset("fail", scpnt, tm_flags); + zfcp_dbf_scsi_devreset("fail", scpnt, tm_flags, fsf_req); retval = FAILED; } else { - zfcp_dbf_scsi_devreset("okay", scpnt, tm_flags); + zfcp_dbf_scsi_devreset("okay", scpnt, tm_flags, fsf_req); zfcp_scsi_forget_cmnds(zfcp_sdev, tm_flags); } diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c index ba258217614e..963c732a3d24 100644 --- a/drivers/scsi/be2iscsi/be_iscsi.c +++ b/drivers/scsi/be2iscsi/be_iscsi.c @@ -165,33 +165,6 @@ beiscsi_conn_create(struct iscsi_cls_session *cls_session, u32 cid) return cls_conn; } -/** - * beiscsi_bindconn_cid - Bind the beiscsi_conn with phba connection table - * @beiscsi_conn: The pointer to beiscsi_conn structure - * @phba: The phba instance - * @cid: The cid to free - */ -static int beiscsi_bindconn_cid(struct beiscsi_hba *phba, - struct beiscsi_conn *beiscsi_conn, - unsigned int cid) -{ - uint16_t cri_index = BE_GET_CRI_FROM_CID(cid); - - if (phba->conn_table[cri_index]) { - beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_CONFIG, - "BS_%d : Connection table already occupied. Detected clash\n"); - - return -EINVAL; - } else { - beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_CONFIG, - "BS_%d : phba->conn_table[%d]=%p(beiscsi_conn)\n", - cri_index, beiscsi_conn); - - phba->conn_table[cri_index] = beiscsi_conn; - } - return 0; -} - /** * beiscsi_conn_bind - Binds iscsi session/connection with TCP connection * @cls_session: pointer to iscsi cls session @@ -212,6 +185,7 @@ int beiscsi_conn_bind(struct iscsi_cls_session *cls_session, struct hwi_wrb_context *pwrb_context; struct beiscsi_endpoint *beiscsi_ep; struct iscsi_endpoint *ep; + uint16_t cri_index; ep = iscsi_lookup_endpoint(transport_fd); if (!ep) @@ -229,20 +203,34 @@ int beiscsi_conn_bind(struct iscsi_cls_session *cls_session, return -EEXIST; } - - pwrb_context = &phwi_ctrlr->wrb_context[BE_GET_CRI_FROM_CID( - beiscsi_ep->ep_cid)]; + cri_index = BE_GET_CRI_FROM_CID(beiscsi_ep->ep_cid); + if (phba->conn_table[cri_index]) { + if (beiscsi_conn != phba->conn_table[cri_index] || + beiscsi_ep != phba->conn_table[cri_index]->ep) { + __beiscsi_log(phba, KERN_ERR, + "BS_%d : conn_table not empty at %u: cid %u conn %p:%p\n", + cri_index, + beiscsi_ep->ep_cid, + beiscsi_conn, + phba->conn_table[cri_index]); + return -EINVAL; + } + } beiscsi_conn->beiscsi_conn_cid = beiscsi_ep->ep_cid; beiscsi_conn->ep = beiscsi_ep; beiscsi_ep->conn = beiscsi_conn; + /** + * Each connection is associated with a WRBQ kept in wrb_context. + * Store doorbell offset for transmit path. + */ + pwrb_context = &phwi_ctrlr->wrb_context[cri_index]; beiscsi_conn->doorbell_offset = pwrb_context->doorbell_offset; - beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_CONFIG, - "BS_%d : beiscsi_conn=%p conn=%p ep_cid=%d\n", - beiscsi_conn, conn, beiscsi_ep->ep_cid); - - return beiscsi_bindconn_cid(phba, beiscsi_conn, beiscsi_ep->ep_cid); + "BS_%d : cid %d phba->conn_table[%u]=%p\n", + beiscsi_ep->ep_cid, cri_index, beiscsi_conn); + phba->conn_table[cri_index] = beiscsi_conn; + return 0; } static int beiscsi_iface_create_ipv4(struct beiscsi_hba *phba) @@ -973,9 +961,9 @@ int beiscsi_conn_start(struct iscsi_cls_conn *cls_conn) */ static int beiscsi_get_cid(struct beiscsi_hba *phba) { - unsigned short cid = 0xFFFF, cid_from_ulp; - struct ulp_cid_info *cid_info = NULL; uint16_t cid_avlbl_ulp0, cid_avlbl_ulp1; + unsigned short cid, cid_from_ulp; + struct ulp_cid_info *cid_info; /* Find the ULP which has more CID available */ cid_avlbl_ulp0 = (phba->cid_array_info[BEISCSI_ULP0]) ? @@ -984,20 +972,27 @@ static int beiscsi_get_cid(struct beiscsi_hba *phba) BEISCSI_ULP1_AVLBL_CID(phba) : 0; cid_from_ulp = (cid_avlbl_ulp0 > cid_avlbl_ulp1) ? BEISCSI_ULP0 : BEISCSI_ULP1; + /** + * If iSCSI protocol is loaded only on ULP 0, and when cid_avlbl_ulp + * is ZERO for both, ULP 1 is returned. + * Check if ULP is loaded before getting new CID. + */ + if (!test_bit(cid_from_ulp, (void *)&phba->fw_config.ulp_supported)) + return BE_INVALID_CID; - if (test_bit(cid_from_ulp, (void *)&phba->fw_config.ulp_supported)) { - cid_info = phba->cid_array_info[cid_from_ulp]; - if (!cid_info->avlbl_cids) - return cid; - - cid = cid_info->cid_array[cid_info->cid_alloc++]; - - if (cid_info->cid_alloc == BEISCSI_GET_CID_COUNT( - phba, cid_from_ulp)) - cid_info->cid_alloc = 0; - - cid_info->avlbl_cids--; + cid_info = phba->cid_array_info[cid_from_ulp]; + cid = cid_info->cid_array[cid_info->cid_alloc]; + if (!cid_info->avlbl_cids || cid == BE_INVALID_CID) { + __beiscsi_log(phba, KERN_ERR, + "BS_%d : failed to get cid: available %u:%u\n", + cid_info->avlbl_cids, cid_info->cid_free); + return BE_INVALID_CID; } + /* empty the slot */ + cid_info->cid_array[cid_info->cid_alloc++] = BE_INVALID_CID; + if (cid_info->cid_alloc == BEISCSI_GET_CID_COUNT(phba, cid_from_ulp)) + cid_info->cid_alloc = 0; + cid_info->avlbl_cids--; return cid; } @@ -1008,22 +1003,28 @@ static int beiscsi_get_cid(struct beiscsi_hba *phba) */ static void beiscsi_put_cid(struct beiscsi_hba *phba, unsigned short cid) { - uint16_t cid_post_ulp; - struct hwi_controller *phwi_ctrlr; - struct hwi_wrb_context *pwrb_context; - struct ulp_cid_info *cid_info = NULL; uint16_t cri_index = BE_GET_CRI_FROM_CID(cid); + struct hwi_wrb_context *pwrb_context; + struct hwi_controller *phwi_ctrlr; + struct ulp_cid_info *cid_info; + uint16_t cid_post_ulp; phwi_ctrlr = phba->phwi_ctrlr; pwrb_context = &phwi_ctrlr->wrb_context[cri_index]; cid_post_ulp = pwrb_context->ulp_num; cid_info = phba->cid_array_info[cid_post_ulp]; - cid_info->avlbl_cids++; - + /* fill only in empty slot */ + if (cid_info->cid_array[cid_info->cid_free] != BE_INVALID_CID) { + __beiscsi_log(phba, KERN_ERR, + "BS_%d : failed to put cid %u: available %u:%u\n", + cid, cid_info->avlbl_cids, cid_info->cid_free); + return; + } cid_info->cid_array[cid_info->cid_free++] = cid; if (cid_info->cid_free == BEISCSI_GET_CID_COUNT(phba, cid_post_ulp)) cid_info->cid_free = 0; + cid_info->avlbl_cids++; } /** @@ -1037,8 +1038,8 @@ static void beiscsi_free_ep(struct beiscsi_endpoint *beiscsi_ep) beiscsi_put_cid(phba, beiscsi_ep->ep_cid); beiscsi_ep->phba = NULL; - phba->ep_array[BE_GET_CRI_FROM_CID - (beiscsi_ep->ep_cid)] = NULL; + /* clear this to track freeing in beiscsi_ep_disconnect */ + phba->ep_array[BE_GET_CRI_FROM_CID(beiscsi_ep->ep_cid)] = NULL; /** * Check if any connection resource allocated by driver @@ -1049,6 +1050,11 @@ static void beiscsi_free_ep(struct beiscsi_endpoint *beiscsi_ep) return; beiscsi_conn = beiscsi_ep->conn; + /** + * Break ep->conn link here so that completions after + * this are ignored. + */ + beiscsi_ep->conn = NULL; if (beiscsi_conn->login_in_progress) { beiscsi_free_mgmt_task_handles(beiscsi_conn, beiscsi_conn->task); @@ -1079,7 +1085,7 @@ static int beiscsi_open_conn(struct iscsi_endpoint *ep, "BS_%d : In beiscsi_open_conn\n"); beiscsi_ep->ep_cid = beiscsi_get_cid(phba); - if (beiscsi_ep->ep_cid == 0xFFFF) { + if (beiscsi_ep->ep_cid == BE_INVALID_CID) { beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_CONFIG, "BS_%d : No free cid available\n"); return ret; @@ -1284,26 +1290,6 @@ static int beiscsi_close_conn(struct beiscsi_endpoint *beiscsi_ep, int flag) return ret; } -/** - * beiscsi_unbind_conn_to_cid - Unbind the beiscsi_conn from phba conn table - * @phba: The phba instance - * @cid: The cid to free - */ -static int beiscsi_unbind_conn_to_cid(struct beiscsi_hba *phba, - unsigned int cid) -{ - uint16_t cri_index = BE_GET_CRI_FROM_CID(cid); - - if (phba->conn_table[cri_index]) - phba->conn_table[cri_index] = NULL; - else { - beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_CONFIG, - "BS_%d : Connection table Not occupied.\n"); - return -EINVAL; - } - return 0; -} - /** * beiscsi_ep_disconnect - Tears down the TCP connection * @ep: endpoint to be used @@ -1318,13 +1304,23 @@ void beiscsi_ep_disconnect(struct iscsi_endpoint *ep) unsigned int tag; uint8_t mgmt_invalidate_flag, tcp_upload_flag; unsigned short savecfg_flag = CMD_ISCSI_SESSION_SAVE_CFG_ON_FLASH; + uint16_t cri_index; beiscsi_ep = ep->dd_data; phba = beiscsi_ep->phba; beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_CONFIG, - "BS_%d : In beiscsi_ep_disconnect for ep_cid = %d\n", + "BS_%d : In beiscsi_ep_disconnect for ep_cid = %u\n", beiscsi_ep->ep_cid); + cri_index = BE_GET_CRI_FROM_CID(beiscsi_ep->ep_cid); + if (!phba->ep_array[cri_index]) { + __beiscsi_log(phba, KERN_ERR, + "BS_%d : ep_array at %u cid %u empty\n", + cri_index, + beiscsi_ep->ep_cid); + return; + } + if (beiscsi_ep->conn) { beiscsi_conn = beiscsi_ep->conn; iscsi_suspend_queue(beiscsi_conn->conn); @@ -1356,7 +1352,12 @@ void beiscsi_ep_disconnect(struct iscsi_endpoint *ep) free_ep: msleep(BEISCSI_LOGOUT_SYNC_DELAY); beiscsi_free_ep(beiscsi_ep); - beiscsi_unbind_conn_to_cid(phba, beiscsi_ep->ep_cid); + if (!phba->conn_table[cri_index]) + __beiscsi_log(phba, KERN_ERR, + "BS_%d : conn_table empty at %u: cid %u\n", + cri_index, + beiscsi_ep->ep_cid); + phba->conn_table[cri_index] = NULL; iscsi_destroy_endpoint(beiscsi_ep->openiscsi_ep); } diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index d9239c2d49b1..741cc96379cb 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -4085,9 +4085,10 @@ static int hba_setup_cid_tbls(struct beiscsi_hba *phba) } /* Allocate memory for CID array */ - ptr_cid_info->cid_array = kzalloc(sizeof(void *) * - BEISCSI_GET_CID_COUNT(phba, - ulp_num), GFP_KERNEL); + ptr_cid_info->cid_array = + kcalloc(BEISCSI_GET_CID_COUNT(phba, ulp_num), + sizeof(*ptr_cid_info->cid_array), + GFP_KERNEL); if (!ptr_cid_info->cid_array) { beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_INIT, "BM_%d : Failed to allocate memory" diff --git a/drivers/scsi/be2iscsi/be_main.h b/drivers/scsi/be2iscsi/be_main.h index 6376657e45f7..02d00ab6d981 100644 --- a/drivers/scsi/be2iscsi/be_main.h +++ b/drivers/scsi/be2iscsi/be_main.h @@ -358,6 +358,7 @@ struct beiscsi_hba { unsigned int age; struct list_head hba_queue; #define BE_MAX_SESSION 2048 +#define BE_INVALID_CID 0xffff #define BE_SET_CID_TO_CRI(cri_index, cid) \ (phba->cid_to_cri_map[cid] = cri_index) #define BE_GET_CRI_FROM_CID(cid) (phba->cid_to_cri_map[cid]) diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c index 375d81850f15..d5f6fbfa17bf 100644 --- a/drivers/scsi/device_handler/scsi_dh_emc.c +++ b/drivers/scsi/device_handler/scsi_dh_emc.c @@ -461,7 +461,7 @@ static int clariion_prep_fn(struct scsi_device *sdev, struct request *req) static int clariion_std_inquiry(struct scsi_device *sdev, struct clariion_dh_data *csdev) { - int err; + int err = SCSI_DH_OK; char *sp_model; err = send_inquiry_cmd(sdev, 0, csdev); diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index d8b1fbd4c8aa..35cbd36f8d3b 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -1901,9 +1901,12 @@ static void megasas_complete_outstanding_ioctls(struct megasas_instance *instanc if (cmd_fusion->sync_cmd_idx != (u32)ULONG_MAX) { cmd_mfi = instance->cmd_list[cmd_fusion->sync_cmd_idx]; if (cmd_mfi->sync_cmd && - cmd_mfi->frame->hdr.cmd != MFI_CMD_ABORT) + (cmd_mfi->frame->hdr.cmd != MFI_CMD_ABORT)) { + cmd_mfi->frame->hdr.cmd_status = + MFI_STAT_WRONG_STATE; megasas_complete_cmd(instance, cmd_mfi, DID_OK); + } } } } else { @@ -5290,7 +5293,8 @@ static int megasas_init_fw(struct megasas_instance *instance) instance->throttlequeuedepth = MEGASAS_THROTTLE_QUEUE_DEPTH; - if (resetwaittime > MEGASAS_RESET_WAIT_TIME) + if ((resetwaittime < 1) || + (resetwaittime > MEGASAS_RESET_WAIT_TIME)) resetwaittime = MEGASAS_RESET_WAIT_TIME; if ((scmd_timeout < 10) || (scmd_timeout > MEGASAS_DEFAULT_CMD_TIMEOUT)) @@ -5459,6 +5463,14 @@ megasas_register_aen(struct megasas_instance *instance, u32 seq_num, prev_aen.word = le32_to_cpu(instance->aen_cmd->frame->dcmd.mbox.w[1]); + if ((curr_aen.members.class < MFI_EVT_CLASS_DEBUG) || + (curr_aen.members.class > MFI_EVT_CLASS_DEAD)) { + dev_info(&instance->pdev->dev, + "%s %d out of range class %d send by application\n", + __func__, __LINE__, curr_aen.members.class); + return 0; + } + /* * A class whose enum value is smaller is inclusive of all * higher values. If a PROGRESS (= -1) was previously diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 8c4641b518b5..9a34afcb1c4c 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -318,6 +318,8 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj, return -EINVAL; if (start > ha->optrom_size) return -EINVAL; + if (size > ha->optrom_size - start) + size = ha->optrom_size - start; mutex_lock(&ha->optrom_mutex); switch (val) { @@ -343,8 +345,7 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj, } ha->optrom_region_start = start; - ha->optrom_region_size = start + size > ha->optrom_size ? - ha->optrom_size - start : size; + ha->optrom_region_size = start + size; ha->optrom_state = QLA_SREADING; ha->optrom_buffer = vmalloc(ha->optrom_region_size); @@ -417,8 +418,7 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj, } ha->optrom_region_start = start; - ha->optrom_region_size = start + size > ha->optrom_size ? - ha->optrom_size - start : size; + ha->optrom_region_size = start + size; ha->optrom_state = QLA_SWRITING; ha->optrom_buffer = vmalloc(ha->optrom_region_size); diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 3dfb54abc874..f8ae70476b3a 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -74,7 +74,7 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha) * ensures no active vp_list traversal while the vport is removed * from the queue) */ - wait_event_timeout(vha->vref_waitq, atomic_read(&vha->vref_count), + wait_event_timeout(vha->vref_waitq, !atomic_read(&vha->vref_count), 10*HZ); spin_lock_irqsave(&ha->vport_slock, flags); diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 42bca619f854..c39551b32e94 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -3696,7 +3696,7 @@ iscsi_if_rx(struct sk_buff *skb) uint32_t group; nlh = nlmsg_hdr(skb); - if (nlh->nlmsg_len < sizeof(*nlh) || + if (nlh->nlmsg_len < sizeof(*nlh) + sizeof(*ev) || skb->len < nlh->nlmsg_len) { break; } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 13ac7e57a35d..09fa1fd0c4ce 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2867,8 +2867,6 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_read_write_same(sdkp, buffer); } - sdkp->first_scan = 0; - /* * We now have all cache related info, determine how we deal * with flush requests. @@ -2883,7 +2881,7 @@ static int sd_revalidate_disk(struct gendisk *disk) q->limits.max_dev_sectors = logical_to_sectors(sdp, dev_max); /* - * Use the device's preferred I/O size for reads and writes + * Determine the device's preferred I/O size for reads and writes * unless the reported value is unreasonably small, large, or * garbage. */ @@ -2897,8 +2895,19 @@ static int sd_revalidate_disk(struct gendisk *disk) rw_max = min_not_zero(logical_to_sectors(sdp, dev_max), (sector_t)BLK_DEF_MAX_SECTORS); - /* Combine with controller limits */ - q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q)); + /* Do not exceed controller limit */ + rw_max = min(rw_max, queue_max_hw_sectors(q)); + + /* + * Only update max_sectors if previously unset or if the current value + * exceeds the capabilities of the hardware. + */ + if (sdkp->first_scan || + q->limits.max_sectors > q->limits.max_dev_sectors || + q->limits.max_sectors > q->limits.max_hw_sectors) + q->limits.max_sectors = rw_max; + + sdkp->first_scan = 0; set_capacity(disk, logical_to_sectors(sdp, sdkp->capacity)); sd_config_write_same(sdkp); diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 9236a13d5d2a..02dfbc1373e3 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -122,7 +122,7 @@ struct sg_device; /* forward declarations */ struct sg_fd; typedef struct sg_request { /* SG_MAX_QUEUE requests outstanding per file */ - struct sg_request *nextrp; /* NULL -> tail request (slist) */ + struct list_head entry; /* list entry */ struct sg_fd *parentfp; /* NULL -> not in use */ Sg_scatter_hold data; /* hold buffer, perhaps scatter list */ sg_io_hdr_t header; /* scsi command+info, see */ @@ -146,8 +146,7 @@ typedef struct sg_fd { /* holds the state of a file descriptor */ int timeout; /* defaults to SG_DEFAULT_TIMEOUT */ int timeout_user; /* defaults to SG_DEFAULT_TIMEOUT_USER */ Sg_scatter_hold reserve; /* buffer held for this file descriptor */ - unsigned save_scat_len; /* original length of trunc. scat. element */ - Sg_request *headrp; /* head of request slist, NULL->empty */ + struct list_head rq_list; /* head of request list */ struct fasync_struct *async_qp; /* used by asynchronous notification */ Sg_request req_arr[SG_MAX_QUEUE]; /* used as singly-linked list */ char low_dma; /* as in parent but possibly overridden to 1 */ @@ -829,6 +828,39 @@ static int max_sectors_bytes(struct request_queue *q) return max_sectors << 9; } +static void +sg_fill_request_table(Sg_fd *sfp, sg_req_info_t *rinfo) +{ + Sg_request *srp; + int val; + unsigned int ms; + + val = 0; + list_for_each_entry(srp, &sfp->rq_list, entry) { + if (val > SG_MAX_QUEUE) + break; + rinfo[val].req_state = srp->done + 1; + rinfo[val].problem = + srp->header.masked_status & + srp->header.host_status & + srp->header.driver_status; + if (srp->done) + rinfo[val].duration = + srp->header.duration; + else { + ms = jiffies_to_msecs(jiffies); + rinfo[val].duration = + (ms > srp->header.duration) ? + (ms - srp->header.duration) : 0; + } + rinfo[val].orphan = srp->orphan; + rinfo[val].sg_io_owned = srp->sg_io_owned; + rinfo[val].pack_id = srp->header.pack_id; + rinfo[val].usr_ptr = srp->header.usr_ptr; + val++; + } +} + static long sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) { @@ -941,7 +973,7 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) if (!access_ok(VERIFY_WRITE, ip, sizeof (int))) return -EFAULT; read_lock_irqsave(&sfp->rq_list_lock, iflags); - for (srp = sfp->headrp; srp; srp = srp->nextrp) { + list_for_each_entry(srp, &sfp->rq_list, entry) { if ((1 == srp->done) && (!srp->sg_io_owned)) { read_unlock_irqrestore(&sfp->rq_list_lock, iflags); @@ -954,7 +986,8 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) return 0; case SG_GET_NUM_WAITING: read_lock_irqsave(&sfp->rq_list_lock, iflags); - for (val = 0, srp = sfp->headrp; srp; srp = srp->nextrp) { + val = 0; + list_for_each_entry(srp, &sfp->rq_list, entry) { if ((1 == srp->done) && (!srp->sg_io_owned)) ++val; } @@ -1022,42 +1055,15 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) return -EFAULT; else { sg_req_info_t *rinfo; - unsigned int ms; - rinfo = kmalloc(SZ_SG_REQ_INFO * SG_MAX_QUEUE, - GFP_KERNEL); + rinfo = kzalloc(SZ_SG_REQ_INFO * SG_MAX_QUEUE, + GFP_KERNEL); if (!rinfo) return -ENOMEM; read_lock_irqsave(&sfp->rq_list_lock, iflags); - for (srp = sfp->headrp, val = 0; val < SG_MAX_QUEUE; - ++val, srp = srp ? srp->nextrp : srp) { - memset(&rinfo[val], 0, SZ_SG_REQ_INFO); - if (srp) { - rinfo[val].req_state = srp->done + 1; - rinfo[val].problem = - srp->header.masked_status & - srp->header.host_status & - srp->header.driver_status; - if (srp->done) - rinfo[val].duration = - srp->header.duration; - else { - ms = jiffies_to_msecs(jiffies); - rinfo[val].duration = - (ms > srp->header.duration) ? - (ms - srp->header.duration) : 0; - } - rinfo[val].orphan = srp->orphan; - rinfo[val].sg_io_owned = - srp->sg_io_owned; - rinfo[val].pack_id = - srp->header.pack_id; - rinfo[val].usr_ptr = - srp->header.usr_ptr; - } - } + sg_fill_request_table(sfp, rinfo); read_unlock_irqrestore(&sfp->rq_list_lock, iflags); - result = __copy_to_user(p, rinfo, + result = __copy_to_user(p, rinfo, SZ_SG_REQ_INFO * SG_MAX_QUEUE); result = result ? -EFAULT : 0; kfree(rinfo); @@ -1163,7 +1169,7 @@ sg_poll(struct file *filp, poll_table * wait) return POLLERR; poll_wait(filp, &sfp->read_wait, wait); read_lock_irqsave(&sfp->rq_list_lock, iflags); - for (srp = sfp->headrp; srp; srp = srp->nextrp) { + list_for_each_entry(srp, &sfp->rq_list, entry) { /* if any read waiting, flag it */ if ((0 == res) && (1 == srp->done) && (!srp->sg_io_owned)) res = POLLIN | POLLRDNORM; @@ -2049,7 +2055,6 @@ sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp) req_schp->pages = NULL; req_schp->page_order = 0; req_schp->sglist_len = 0; - sfp->save_scat_len = 0; srp->res_used = 0; /* Called without mutex lock to avoid deadlock */ sfp->res_in_use = 0; @@ -2062,7 +2067,7 @@ sg_get_rq_mark(Sg_fd * sfp, int pack_id) unsigned long iflags; write_lock_irqsave(&sfp->rq_list_lock, iflags); - for (resp = sfp->headrp; resp; resp = resp->nextrp) { + list_for_each_entry(resp, &sfp->rq_list, entry) { /* look for requests that are ready + not SG_IO owned */ if ((1 == resp->done) && (!resp->sg_io_owned) && ((-1 == pack_id) || (resp->header.pack_id == pack_id))) { @@ -2080,70 +2085,45 @@ sg_add_request(Sg_fd * sfp) { int k; unsigned long iflags; - Sg_request *resp; Sg_request *rp = sfp->req_arr; write_lock_irqsave(&sfp->rq_list_lock, iflags); - resp = sfp->headrp; - if (!resp) { - memset(rp, 0, sizeof (Sg_request)); - rp->parentfp = sfp; - resp = rp; - sfp->headrp = resp; - } else { - if (0 == sfp->cmd_q) - resp = NULL; /* command queuing disallowed */ - else { - for (k = 0; k < SG_MAX_QUEUE; ++k, ++rp) { - if (!rp->parentfp) - break; - } - if (k < SG_MAX_QUEUE) { - memset(rp, 0, sizeof (Sg_request)); - rp->parentfp = sfp; - while (resp->nextrp) - resp = resp->nextrp; - resp->nextrp = rp; - resp = rp; - } else - resp = NULL; + if (!list_empty(&sfp->rq_list)) { + if (!sfp->cmd_q) + goto out_unlock; + + for (k = 0; k < SG_MAX_QUEUE; ++k, ++rp) { + if (!rp->parentfp) + break; } + if (k >= SG_MAX_QUEUE) + goto out_unlock; } - if (resp) { - resp->nextrp = NULL; - resp->header.duration = jiffies_to_msecs(jiffies); - } + memset(rp, 0, sizeof (Sg_request)); + rp->parentfp = sfp; + rp->header.duration = jiffies_to_msecs(jiffies); + list_add_tail(&rp->entry, &sfp->rq_list); write_unlock_irqrestore(&sfp->rq_list_lock, iflags); - return resp; + return rp; +out_unlock: + write_unlock_irqrestore(&sfp->rq_list_lock, iflags); + return NULL; } /* Return of 1 for found; 0 for not found */ static int sg_remove_request(Sg_fd * sfp, Sg_request * srp) { - Sg_request *prev_rp; - Sg_request *rp; unsigned long iflags; int res = 0; - if ((!sfp) || (!srp) || (!sfp->headrp)) + if (!sfp || !srp || list_empty(&sfp->rq_list)) return res; write_lock_irqsave(&sfp->rq_list_lock, iflags); - prev_rp = sfp->headrp; - if (srp == prev_rp) { - sfp->headrp = prev_rp->nextrp; - prev_rp->parentfp = NULL; + if (!list_empty(&srp->entry)) { + list_del(&srp->entry); + srp->parentfp = NULL; res = 1; - } else { - while ((rp = prev_rp->nextrp)) { - if (srp == rp) { - prev_rp->nextrp = rp->nextrp; - rp->parentfp = NULL; - res = 1; - break; - } - prev_rp = rp; - } } write_unlock_irqrestore(&sfp->rq_list_lock, iflags); return res; @@ -2162,7 +2142,7 @@ sg_add_sfp(Sg_device * sdp) init_waitqueue_head(&sfp->read_wait); rwlock_init(&sfp->rq_list_lock); - + INIT_LIST_HEAD(&sfp->rq_list); kref_init(&sfp->f_ref); mutex_init(&sfp->f_mutex); sfp->timeout = SG_DEFAULT_TIMEOUT; @@ -2203,10 +2183,13 @@ sg_remove_sfp_usercontext(struct work_struct *work) { struct sg_fd *sfp = container_of(work, struct sg_fd, ew.work); struct sg_device *sdp = sfp->parentdp; + Sg_request *srp; /* Cleanup any responses which were never read(). */ - while (sfp->headrp) - sg_finish_rem_req(sfp->headrp); + while (!list_empty(&sfp->rq_list)) { + srp = list_first_entry(&sfp->rq_list, Sg_request, entry); + sg_finish_rem_req(srp); + } if (sfp->reserve.bufflen > 0) { SCSI_LOG_TIMEOUT(6, sg_printk(KERN_INFO, sdp, @@ -2609,7 +2592,7 @@ static int sg_proc_seq_show_devstrs(struct seq_file *s, void *v) /* must be called while holding sg_index_lock */ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) { - int k, m, new_interface, blen, usg; + int k, new_interface, blen, usg; Sg_request *srp; Sg_fd *fp; const sg_io_hdr_t *hp; @@ -2629,13 +2612,11 @@ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) seq_printf(s, " cmd_q=%d f_packid=%d k_orphan=%d closed=0\n", (int) fp->cmd_q, (int) fp->force_packid, (int) fp->keep_orphan); - for (m = 0, srp = fp->headrp; - srp != NULL; - ++m, srp = srp->nextrp) { + list_for_each_entry(srp, &fp->rq_list, entry) { hp = &srp->header; new_interface = (hp->interface_id == '\0') ? 0 : 1; if (srp->res_used) { - if (new_interface && + if (new_interface && (SG_FLAG_MMAP_IO & hp->flags)) cp = " mmap>> "; else @@ -2666,7 +2647,7 @@ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) seq_printf(s, "ms sgat=%d op=0x%02x\n", usg, (int) srp->data.cmd_opcode); } - if (0 == m) + if (list_empty(&fp->rq_list)) seq_puts(s, " No requests active\n"); read_unlock(&fp->rq_list_lock); } diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index c5ab1b0037fc..2bf96d33428a 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1559,6 +1559,8 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) ret = storvsc_do_io(dev, cmd_request); if (ret == -EAGAIN) { + if (payload_sz > sizeof(cmd_request->mpb)) + kfree(payload); /* no more space */ return SCSI_MLQUEUE_DEVICE_BUSY; } diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index d6239fa718be..3f3751e2b521 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1458,6 +1458,10 @@ static const struct pci_device_id pxa2xx_spi_pci_compound_match[] = { { PCI_VDEVICE(INTEL, 0x1ac2), LPSS_BXT_SSP }, { PCI_VDEVICE(INTEL, 0x1ac4), LPSS_BXT_SSP }, { PCI_VDEVICE(INTEL, 0x1ac6), LPSS_BXT_SSP }, + /* GLK */ + { PCI_VDEVICE(INTEL, 0x31c2), LPSS_BXT_SSP }, + { PCI_VDEVICE(INTEL, 0x31c4), LPSS_BXT_SSP }, + { PCI_VDEVICE(INTEL, 0x31c6), LPSS_BXT_SSP }, /* APL */ { PCI_VDEVICE(INTEL, 0x5ac2), LPSS_BXT_SSP }, { PCI_VDEVICE(INTEL, 0x5ac4), LPSS_BXT_SSP }, diff --git a/drivers/staging/iio/adc/ad7192.c b/drivers/staging/iio/adc/ad7192.c index 1cf6b79801a9..eeacb0e55db7 100644 --- a/drivers/staging/iio/adc/ad7192.c +++ b/drivers/staging/iio/adc/ad7192.c @@ -222,11 +222,9 @@ static int ad7192_setup(struct ad7192_state *st, struct iio_dev *indio_dev = spi_get_drvdata(st->sd.spi); unsigned long long scale_uv; int i, ret, id; - u8 ones[6]; /* reset the serial interface */ - memset(&ones, 0xFF, 6); - ret = spi_write(st->sd.spi, &ones, 6); + ret = ad_sd_reset(&st->sd, 48); if (ret < 0) goto out; usleep_range(500, 1000); /* Wait for at least 500us */ diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c index 1091b9f1dd07..f72eebc71dd8 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c @@ -106,8 +106,14 @@ int vchiq_platform_init(struct platform_device *pdev, VCHIQ_STATE_T *state) g_virt_to_bus_offset = virt_to_dma(dev, (void *)0); - (void)of_property_read_u32(dev->of_node, "cache-line-size", + err = of_property_read_u32(dev->of_node, "cache-line-size", &g_cache_line_size); + + if (err) { + dev_err(dev, "Missing cache-line-size property\n"); + return -ENODEV; + } + g_fragments_size = 2 * g_cache_line_size; /* Allocate space for the channels in coherent memory */ @@ -538,18 +544,20 @@ free_pagelist(PAGELIST_T *pagelist, int actual) if (head_bytes > actual) head_bytes = actual; - memcpy((char *)page_address(pages[0]) + + memcpy((char *)kmap(pages[0]) + pagelist->offset, fragments, head_bytes); + kunmap(pages[0]); } if ((actual >= 0) && (head_bytes < actual) && (tail_bytes != 0)) { - memcpy((char *)page_address(pages[num_pages - 1]) + + memcpy((char *)kmap(pages[num_pages - 1]) + ((pagelist->offset + actual) & (PAGE_SIZE - 1) & ~(g_cache_line_size - 1)), fragments + g_cache_line_size, tail_bytes); + kunmap(pages[num_pages - 1]); } down(&g_free_fragments_mutex); diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index efc453ef6831..ab92a1bc9666 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -44,10 +44,8 @@ void iscsit_set_dataout_sequence_values( */ if (cmd->unsolicited_data) { cmd->seq_start_offset = cmd->write_data_done; - cmd->seq_end_offset = (cmd->write_data_done + - ((cmd->se_cmd.data_length > - conn->sess->sess_ops->FirstBurstLength) ? - conn->sess->sess_ops->FirstBurstLength : cmd->se_cmd.data_length)); + cmd->seq_end_offset = min(cmd->se_cmd.data_length, + conn->sess->sess_ops->FirstBurstLength); return; } diff --git a/drivers/tty/goldfish.c b/drivers/tty/goldfish.c index 3fc912373adf..996bd473dd03 100644 --- a/drivers/tty/goldfish.c +++ b/drivers/tty/goldfish.c @@ -300,7 +300,7 @@ static int goldfish_tty_probe(struct platform_device *pdev) return 0; err_tty_register_device_failed: - free_irq(irq, pdev); + free_irq(irq, qtty); err_request_irq_failed: goldfish_tty_current_line_count--; if (goldfish_tty_current_line_count == 0) diff --git a/drivers/tty/serial/8250/8250_moxa.c b/drivers/tty/serial/8250/8250_moxa.c index 26eb5393a263..d5069b2d4d79 100644 --- a/drivers/tty/serial/8250/8250_moxa.c +++ b/drivers/tty/serial/8250/8250_moxa.c @@ -68,6 +68,7 @@ static int moxa8250_probe(struct pci_dev *pdev, const struct pci_device_id *id) sizeof(unsigned int) * nr_ports, GFP_KERNEL); if (!brd) return -ENOMEM; + brd->num_ports = nr_ports; memset(&uart, 0, sizeof(struct uart_8250_port)); diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index f24d3030b98c..1ef31e3ee4a1 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1751,8 +1751,6 @@ void serial8250_tx_chars(struct uart_8250_port *up) if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(port); - pr_debug("%s: THRE\n", __func__); - /* * With RPM enabled, we have to wait until the FIFO is empty before the * HW can go idle. So we get here once again with empty FIFO and disable @@ -1817,8 +1815,6 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) status = serial_port_in(port, UART_LSR); - pr_debug("%s: status = %x\n", __func__, status); - if (status & (UART_LSR_DR | UART_LSR_BI)) { if (!up->dma || handle_rx_dma(up, iir)) status = serial8250_rx_chars(up, status); diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c index 4e603d060e80..59828d819145 100644 --- a/drivers/tty/serial/sunhv.c +++ b/drivers/tty/serial/sunhv.c @@ -398,6 +398,12 @@ static struct uart_driver sunhv_reg = { static struct uart_port *sunhv_port; +void sunhv_migrate_hvcons_irq(int cpu) +{ + /* Migrate hvcons irq to param cpu */ + irq_force_affinity(sunhv_port->irq, cpumask_of(cpu)); +} + /* Copy 's' into the con_write_page, decoding "\n" into * "\r\n" along the way. We have to return two lengths * because the caller needs to know how much to advance diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index aa80dc94ddc2..c220c2c0893f 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -361,6 +361,32 @@ int tty_insert_flip_string_flags(struct tty_port *port, } EXPORT_SYMBOL(tty_insert_flip_string_flags); +/** + * __tty_insert_flip_char - Add one character to the tty buffer + * @port: tty port + * @ch: character + * @flag: flag byte + * + * Queue a single byte to the tty buffering, with an optional flag. + * This is the slow path of tty_insert_flip_char. + */ +int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag) +{ + struct tty_buffer *tb; + int flags = (flag == TTY_NORMAL) ? TTYB_NORMAL : 0; + + if (!__tty_buffer_request_room(port, 1, flags)) + return 0; + + tb = port->buf.tail; + if (~tb->flags & TTYB_NORMAL) + *flag_buf_ptr(tb, tb->used) = flag; + *char_buf_ptr(tb, tb->used++) = ch; + + return 1; +} +EXPORT_SYMBOL(__tty_insert_flip_char); + /** * tty_schedule_flip - push characters to ldisc * @port: tty port to push from diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c index 0cf149edddd8..f36a1ac3bfbd 100644 --- a/drivers/usb/chipidea/otg.c +++ b/drivers/usb/chipidea/otg.c @@ -134,9 +134,9 @@ void ci_handle_vbus_change(struct ci_hdrc *ci) if (!ci->is_otg) return; - if (hw_read_otgsc(ci, OTGSC_BSV)) + if (hw_read_otgsc(ci, OTGSC_BSV) && !ci->vbus_active) usb_gadget_vbus_connect(&ci->gadget); - else + else if (!hw_read_otgsc(ci, OTGSC_BSV) && ci->vbus_active) usb_gadget_vbus_disconnect(&ci->gadget); } @@ -175,14 +175,21 @@ static void ci_handle_id_switch(struct ci_hdrc *ci) ci_role_stop(ci); - if (role == CI_ROLE_GADGET) + if (role == CI_ROLE_GADGET && + IS_ERR(ci->platdata->vbus_extcon.edev)) /* - * wait vbus lower than OTGSC_BSV before connecting - * to host + * Wait vbus lower than OTGSC_BSV before connecting + * to host. If connecting status is from an external + * connector instead of register, we don't need to + * care vbus on the board, since it will not affect + * external connector status. */ hw_wait_vbus_lower_bsv(ci); ci_role_start(ci, role); + /* vbus change may have already occurred */ + if (role == CI_ROLE_GADGET) + ci_handle_vbus_change(ci); } } /** diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index f16491c25e73..ea20b2cc189f 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1773,6 +1773,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0xfff0, 0x0100), /* DATECS FP-2000 */ .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ }, + { USB_DEVICE(0x09d8, 0x0320), /* Elatec GmbH TWN3 */ + .driver_info = NO_UNION_NORMAL, /* has misplaced union descriptor */ + }, { USB_DEVICE(0x2912, 0x0001), /* ATOL FPrint */ .driver_info = CLEAR_HALT_CONDITIONS, diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 0b845e550fbd..9f001659807a 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -194,8 +194,10 @@ static void wdm_in_callback(struct urb *urb) /* * only set a new error if there is no previous error. * Errors are only cleared during read/open + * Avoid propagating -EPIPE (stall) to userspace since it is + * better handled as an empty read */ - if (desc->rerr == 0) + if (desc->rerr == 0 && status != -EPIPE) desc->rerr = status; if (length + desc->length > desc->wMaxCommand) { diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index eef716bdc259..5008f71fb08d 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -638,15 +638,23 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, } else if (header->bDescriptorType == USB_DT_INTERFACE_ASSOCIATION) { + struct usb_interface_assoc_descriptor *d; + + d = (struct usb_interface_assoc_descriptor *)header; + if (d->bLength < USB_DT_INTERFACE_ASSOCIATION_SIZE) { + dev_warn(ddev, + "config %d has an invalid interface association descriptor of length %d, skipping\n", + cfgno, d->bLength); + continue; + } + if (iad_num == USB_MAXIADS) { dev_warn(ddev, "found more Interface " "Association Descriptors " "than allocated for in " "configuration %d\n", cfgno); } else { - config->intf_assoc[iad_num] = - (struct usb_interface_assoc_descriptor - *)header; + config->intf_assoc[iad_num] = d; iad_num++; } @@ -847,7 +855,7 @@ int usb_get_configuration(struct usb_device *dev) } if (dev->quirks & USB_QUIRK_DELAY_INIT) - msleep(100); + msleep(200); result = usb_get_descriptor(dev, USB_DT_CONFIG, cfgno, bigbuffer, length); @@ -947,10 +955,12 @@ int usb_get_bos_descriptor(struct usb_device *dev) for (i = 0; i < num; i++) { buffer += length; cap = (struct usb_dev_cap_header *)buffer; - length = cap->bLength; - if (total_len < length) + if (total_len < sizeof(*cap) || total_len < cap->bLength) { + dev->bos->desc->bNumDeviceCaps = i; break; + } + length = cap->bLength; total_len -= length; if (cap->bDescriptorType != USB_DT_DEVICE_CAPABILITY) { diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 80d4ef31ba8d..706b3d6a7614 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2704,13 +2704,16 @@ static int hub_port_wait_reset(struct usb_hub *hub, int port1, if (!(portstatus & USB_PORT_STAT_CONNECTION)) return -ENOTCONN; - /* bomb out completely if the connection bounced. A USB 3.0 - * connection may bounce if multiple warm resets were issued, + /* Retry if connect change is set but status is still connected. + * A USB 3.0 connection may bounce if multiple warm resets were issued, * but the device may have successfully re-connected. Ignore it. */ if (!hub_is_superspeed(hub->hdev) && - (portchange & USB_PORT_STAT_C_CONNECTION)) - return -ENOTCONN; + (portchange & USB_PORT_STAT_C_CONNECTION)) { + usb_clear_port_feature(hub->hdev, port1, + USB_PORT_FEAT_C_CONNECTION); + return -EAGAIN; + } if (!(portstatus & USB_PORT_STAT_ENABLE)) return -EBUSY; @@ -4828,7 +4831,7 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, goto loop; if (udev->quirks & USB_QUIRK_DELAY_INIT) - msleep(1000); + msleep(2000); /* consecutive bus-powered hubs aren't reliable; they can * violate the voltage drop budget. if the new child has diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 3a4707746157..4c388451f31f 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -2068,6 +2068,10 @@ int cdc_parse_cdc_header(struct usb_cdc_parsed_header *hdr, elength = 1; goto next_desc; } + if ((buflen < elength) || (elength < 3)) { + dev_err(&intf->dev, "invalid descriptor buffer length\n"); + break; + } if (buffer[1] != USB_DT_CS_INTERFACE) { dev_err(&intf->dev, "skipping garbage\n"); goto next_desc; diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 82806e311202..a6aaf2f193a4 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -221,6 +221,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* Corsair Strafe RGB */ { USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT }, + /* MIDI keyboard WORLDE MINI */ + { USB_DEVICE(0x1c75, 0x0204), .driver_info = + USB_QUIRK_CONFIG_INTF_STRINGS }, + /* Acer C120 LED Projector */ { USB_DEVICE(0x1de1, 0xc102), .driver_info = USB_QUIRK_NO_LPM }, diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index f92c680e3937..c61ddbf94bc7 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -817,9 +817,42 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep, if (!node) { trb->ctrl = DWC3_TRBCTL_ISOCHRONOUS_FIRST; + /* + * USB Specification 2.0 Section 5.9.2 states that: "If + * there is only a single transaction in the microframe, + * only a DATA0 data packet PID is used. If there are + * two transactions per microframe, DATA1 is used for + * the first transaction data packet and DATA0 is used + * for the second transaction data packet. If there are + * three transactions per microframe, DATA2 is used for + * the first transaction data packet, DATA1 is used for + * the second, and DATA0 is used for the third." + * + * IOW, we should satisfy the following cases: + * + * 1) length <= maxpacket + * - DATA0 + * + * 2) maxpacket < length <= (2 * maxpacket) + * - DATA1, DATA0 + * + * 3) (2 * maxpacket) < length <= (3 * maxpacket) + * - DATA2, DATA1, DATA0 + */ if (speed == USB_SPEED_HIGH) { struct usb_ep *ep = &dep->endpoint; - trb->size |= DWC3_TRB_SIZE_PCM1(ep->mult - 1); + unsigned int mult = ep->mult - 1; + unsigned int maxp; + + maxp = usb_endpoint_maxp(ep->desc) & 0x07ff; + + if (length <= (2 * maxp)) + mult--; + + if (length <= maxp) + mult--; + + trb->size |= DWC3_TRB_SIZE_PCM1(mult); } } else { trb->ctrl = DWC3_TRBCTL_ISOCHRONOUS; diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 816c91cb6d1d..afc581a1aa3f 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2024,6 +2024,8 @@ static DEVICE_ATTR_RO(suspended); static void __composite_unbind(struct usb_gadget *gadget, bool unbind_driver) { struct usb_composite_dev *cdev = get_gadget_data(gadget); + struct usb_gadget_strings *gstr = cdev->driver->strings[0]; + struct usb_string *dev_str = gstr->strings; /* composite_disconnect() must already have been called * by the underlying peripheral controller driver! @@ -2043,6 +2045,9 @@ static void __composite_unbind(struct usb_gadget *gadget, bool unbind_driver) composite_dev_cleanup(cdev); + if (dev_str[USB_GADGET_MANUFACTURER_IDX].s == cdev->def_manufacturer) + dev_str[USB_GADGET_MANUFACTURER_IDX].s = ""; + kfree(cdev->def_manufacturer); kfree(cdev); set_gadget_data(gadget, NULL); diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 1f5e71b27f1e..2ef4377005db 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1171,11 +1171,12 @@ static struct configfs_attribute *interf_grp_attrs[] = { NULL }; -int usb_os_desc_prepare_interf_dir(struct config_group *parent, - int n_interf, - struct usb_os_desc **desc, - char **names, - struct module *owner) +struct config_group *usb_os_desc_prepare_interf_dir( + struct config_group *parent, + int n_interf, + struct usb_os_desc **desc, + char **names, + struct module *owner) { struct config_group *os_desc_group; struct config_item_type *os_desc_type, *interface_type; @@ -1187,7 +1188,7 @@ int usb_os_desc_prepare_interf_dir(struct config_group *parent, char *vlabuf = kzalloc(vla_group_size(data_chunk), GFP_KERNEL); if (!vlabuf) - return -ENOMEM; + return ERR_PTR(-ENOMEM); os_desc_group = vla_ptr(vlabuf, data_chunk, os_desc_group); os_desc_type = vla_ptr(vlabuf, data_chunk, os_desc_type); @@ -1212,7 +1213,7 @@ int usb_os_desc_prepare_interf_dir(struct config_group *parent, configfs_add_default_group(&d->group, os_desc_group); } - return 0; + return os_desc_group; } EXPORT_SYMBOL(usb_os_desc_prepare_interf_dir); diff --git a/drivers/usb/gadget/configfs.h b/drivers/usb/gadget/configfs.h index 36c468c4f5e9..540d5e92ed22 100644 --- a/drivers/usb/gadget/configfs.h +++ b/drivers/usb/gadget/configfs.h @@ -5,11 +5,12 @@ void unregister_gadget_item(struct config_item *item); -int usb_os_desc_prepare_interf_dir(struct config_group *parent, - int n_interf, - struct usb_os_desc **desc, - char **names, - struct module *owner); +struct config_group *usb_os_desc_prepare_interf_dir( + struct config_group *parent, + int n_interf, + struct usb_os_desc **desc, + char **names, + struct module *owner); static inline struct usb_os_desc *to_usb_os_desc(struct config_item *item) { diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index ccd93c9e26ab..d2fc237cd87a 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -306,8 +306,6 @@ struct fsg_common { struct completion thread_notifier; struct task_struct *thread_task; - /* Callback functions. */ - const struct fsg_operations *ops; /* Gadget's private data. */ void *private_data; @@ -2505,6 +2503,7 @@ static void handle_exception(struct fsg_common *common) static int fsg_main_thread(void *common_) { struct fsg_common *common = common_; + int i; /* * Allow the thread to be killed by a signal, but set the signal mask @@ -2566,21 +2565,16 @@ static int fsg_main_thread(void *common_) common->thread_task = NULL; spin_unlock_irq(&common->lock); - if (!common->ops || !common->ops->thread_exits - || common->ops->thread_exits(common) < 0) { - int i; + /* Eject media from all LUNs */ - down_write(&common->filesem); - for (i = 0; i < ARRAY_SIZE(common->luns); --i) { - struct fsg_lun *curlun = common->luns[i]; - if (!curlun || !fsg_lun_is_open(curlun)) - continue; + down_write(&common->filesem); + for (i = 0; i < ARRAY_SIZE(common->luns); i++) { + struct fsg_lun *curlun = common->luns[i]; + if (curlun && fsg_lun_is_open(curlun)) fsg_lun_close(curlun); - curlun->unit_attention_data = SS_MEDIUM_NOT_PRESENT; - } - up_write(&common->filesem); } + up_write(&common->filesem); /* Let fsg_unbind() know the thread has exited */ complete_and_exit(&common->thread_notifier, 0); @@ -2770,13 +2764,6 @@ void fsg_common_remove_luns(struct fsg_common *common) } EXPORT_SYMBOL_GPL(fsg_common_remove_luns); -void fsg_common_set_ops(struct fsg_common *common, - const struct fsg_operations *ops) -{ - common->ops = ops; -} -EXPORT_SYMBOL_GPL(fsg_common_set_ops); - void fsg_common_free_buffers(struct fsg_common *common) { _fsg_common_free_buffers(common->buffhds, common->fsg_num_buffers); diff --git a/drivers/usb/gadget/function/f_mass_storage.h b/drivers/usb/gadget/function/f_mass_storage.h index d3902313b8ac..dc05ca0c4359 100644 --- a/drivers/usb/gadget/function/f_mass_storage.h +++ b/drivers/usb/gadget/function/f_mass_storage.h @@ -60,17 +60,6 @@ struct fsg_module_parameters { struct fsg_common; /* FSF callback functions */ -struct fsg_operations { - /* - * Callback function to call when thread exits. If no - * callback is set or it returns value lower then zero MSF - * will force eject all LUNs it operates on (including those - * marked as non-removable or with prevent_medium_removal flag - * set). - */ - int (*thread_exits)(struct fsg_common *common); -}; - struct fsg_lun_opts { struct config_group group; struct fsg_lun *lun; @@ -142,9 +131,6 @@ void fsg_common_remove_lun(struct fsg_lun *lun); void fsg_common_remove_luns(struct fsg_common *common); -void fsg_common_set_ops(struct fsg_common *common, - const struct fsg_operations *ops); - int fsg_common_create_lun(struct fsg_common *common, struct fsg_lun_config *cfg, unsigned int id, const char *name, const char **name_pfx); diff --git a/drivers/usb/gadget/function/f_rndis.c b/drivers/usb/gadget/function/f_rndis.c index 16562e461121..ba00cdb809d6 100644 --- a/drivers/usb/gadget/function/f_rndis.c +++ b/drivers/usb/gadget/function/f_rndis.c @@ -892,6 +892,7 @@ static void rndis_free_inst(struct usb_function_instance *f) free_netdev(opts->net); } + kfree(opts->rndis_interf_group); /* single VLA chunk */ kfree(opts); } @@ -900,6 +901,7 @@ static struct usb_function_instance *rndis_alloc_inst(void) struct f_rndis_opts *opts; struct usb_os_desc *descs[1]; char *names[1]; + struct config_group *rndis_interf_group; opts = kzalloc(sizeof(*opts), GFP_KERNEL); if (!opts) @@ -920,8 +922,14 @@ static struct usb_function_instance *rndis_alloc_inst(void) names[0] = "rndis"; config_group_init_type_name(&opts->func_inst.group, "", &rndis_func_type); - usb_os_desc_prepare_interf_dir(&opts->func_inst.group, 1, descs, - names, THIS_MODULE); + rndis_interf_group = + usb_os_desc_prepare_interf_dir(&opts->func_inst.group, 1, descs, + names, THIS_MODULE); + if (IS_ERR(rndis_interf_group)) { + rndis_free_inst(&opts->func_inst); + return ERR_CAST(rndis_interf_group); + } + opts->rndis_interf_group = rndis_interf_group; return &opts->func_inst; } diff --git a/drivers/usb/gadget/function/u_rndis.h b/drivers/usb/gadget/function/u_rndis.h index 4eafd5050545..4e2ad04fe8d6 100644 --- a/drivers/usb/gadget/function/u_rndis.h +++ b/drivers/usb/gadget/function/u_rndis.h @@ -26,6 +26,7 @@ struct f_rndis_opts { bool bound; bool borrowed_net; + struct config_group *rndis_interf_group; struct usb_os_desc rndis_os_desc; char rndis_ext_compat_id[16]; diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index f959c42ecace..f69dbd4bcd18 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -27,7 +27,7 @@ #include #include #include - +#include #include #include @@ -116,6 +116,7 @@ enum ep0_state { struct dev_data { spinlock_t lock; atomic_t count; + int udc_usage; enum ep0_state state; /* P: lock */ struct usb_gadgetfs_event event [N_EVENT]; unsigned ev_next; @@ -513,9 +514,9 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) INIT_WORK(&priv->work, ep_user_copy_worker); schedule_work(&priv->work); } - spin_unlock(&epdata->dev->lock); usb_ep_free_request(ep, req); + spin_unlock(&epdata->dev->lock); put_ep(epdata); } @@ -939,9 +940,11 @@ ep0_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr) struct usb_request *req = dev->req; if ((retval = setup_req (ep, req, 0)) == 0) { + ++dev->udc_usage; spin_unlock_irq (&dev->lock); retval = usb_ep_queue (ep, req, GFP_KERNEL); spin_lock_irq (&dev->lock); + --dev->udc_usage; } dev->state = STATE_DEV_CONNECTED; @@ -983,11 +986,14 @@ ep0_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr) retval = -EIO; else { len = min (len, (size_t)dev->req->actual); -// FIXME don't call this with the spinlock held ... + ++dev->udc_usage; + spin_unlock_irq(&dev->lock); if (copy_to_user (buf, dev->req->buf, len)) retval = -EFAULT; else retval = len; + spin_lock_irq(&dev->lock); + --dev->udc_usage; clean_req (dev->gadget->ep0, dev->req); /* NOTE userspace can't yet choose to stall */ } @@ -1131,6 +1137,7 @@ ep0_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) retval = setup_req (dev->gadget->ep0, dev->req, len); if (retval == 0) { dev->state = STATE_DEV_CONNECTED; + ++dev->udc_usage; spin_unlock_irq (&dev->lock); if (copy_from_user (dev->req->buf, buf, len)) retval = -EFAULT; @@ -1142,6 +1149,7 @@ ep0_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) GFP_KERNEL); } spin_lock_irq(&dev->lock); + --dev->udc_usage; if (retval < 0) { clean_req (dev->gadget->ep0, dev->req); } else @@ -1243,9 +1251,21 @@ static long dev_ioctl (struct file *fd, unsigned code, unsigned long value) struct usb_gadget *gadget = dev->gadget; long ret = -ENOTTY; - if (gadget->ops->ioctl) + spin_lock_irq(&dev->lock); + if (dev->state == STATE_DEV_OPENED || + dev->state == STATE_DEV_UNBOUND) { + /* Not bound to a UDC */ + } else if (gadget->ops->ioctl) { + ++dev->udc_usage; + spin_unlock_irq(&dev->lock); + ret = gadget->ops->ioctl (gadget, code, value); + spin_lock_irq(&dev->lock); + --dev->udc_usage; + } + spin_unlock_irq(&dev->lock); + return ret; } @@ -1463,10 +1483,12 @@ delegate: if (value < 0) break; + ++dev->udc_usage; spin_unlock (&dev->lock); value = usb_ep_queue (gadget->ep0, dev->req, GFP_KERNEL); spin_lock (&dev->lock); + --dev->udc_usage; if (value < 0) { clean_req (gadget->ep0, dev->req); break; @@ -1490,8 +1512,12 @@ delegate: req->length = value; req->zero = value < w_length; + ++dev->udc_usage; spin_unlock (&dev->lock); value = usb_ep_queue (gadget->ep0, req, GFP_KERNEL); + spin_lock(&dev->lock); + --dev->udc_usage; + spin_unlock(&dev->lock); if (value < 0) { DBG (dev, "ep_queue --> %d\n", value); req->status = 0; @@ -1518,21 +1544,24 @@ static void destroy_ep_files (struct dev_data *dev) /* break link to FS */ ep = list_first_entry (&dev->epfiles, struct ep_data, epfiles); list_del_init (&ep->epfiles); + spin_unlock_irq (&dev->lock); + dentry = ep->dentry; ep->dentry = NULL; parent = d_inode(dentry->d_parent); /* break link to controller */ + mutex_lock(&ep->lock); if (ep->state == STATE_EP_ENABLED) (void) usb_ep_disable (ep->ep); ep->state = STATE_EP_UNBOUND; usb_ep_free_request (ep->ep, ep->req); ep->ep = NULL; + mutex_unlock(&ep->lock); + wake_up (&ep->wait); put_ep (ep); - spin_unlock_irq (&dev->lock); - /* break link to dcache */ inode_lock(parent); d_delete (dentry); @@ -1603,6 +1632,11 @@ gadgetfs_unbind (struct usb_gadget *gadget) spin_lock_irq (&dev->lock); dev->state = STATE_DEV_UNBOUND; + while (dev->udc_usage > 0) { + spin_unlock_irq(&dev->lock); + usleep_range(1000, 2000); + spin_lock_irq(&dev->lock); + } spin_unlock_irq (&dev->lock); destroy_ep_files (dev); diff --git a/drivers/usb/gadget/legacy/mass_storage.c b/drivers/usb/gadget/legacy/mass_storage.c index 125974f32f50..fcba59782f26 100644 --- a/drivers/usb/gadget/legacy/mass_storage.c +++ b/drivers/usb/gadget/legacy/mass_storage.c @@ -107,15 +107,6 @@ static unsigned int fsg_num_buffers = CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS; FSG_MODULE_PARAMETERS(/* no prefix */, mod_data); -static unsigned long msg_registered; -static void msg_cleanup(void); - -static int msg_thread_exits(struct fsg_common *common) -{ - msg_cleanup(); - return 0; -} - static int msg_do_config(struct usb_configuration *c) { struct fsg_opts *opts; @@ -154,9 +145,6 @@ static struct usb_configuration msg_config_driver = { static int msg_bind(struct usb_composite_dev *cdev) { - static const struct fsg_operations ops = { - .thread_exits = msg_thread_exits, - }; struct fsg_opts *opts; struct fsg_config config; int status; @@ -173,8 +161,6 @@ static int msg_bind(struct usb_composite_dev *cdev) if (status) goto fail; - fsg_common_set_ops(opts->common, &ops); - status = fsg_common_set_cdev(opts->common, cdev, config.can_stall); if (status) goto fail_set_cdev; @@ -210,7 +196,6 @@ static int msg_bind(struct usb_composite_dev *cdev) usb_composite_overwrite_options(cdev, &coverwrite); dev_info(&cdev->gadget->dev, DRIVER_DESC ", version: " DRIVER_VERSION "\n"); - set_bit(0, &msg_registered); return 0; fail_otg_desc: @@ -261,9 +246,8 @@ static int __init msg_init(void) } module_init(msg_init); -static void msg_cleanup(void) +static void __exit msg_cleanup(void) { - if (test_and_clear_bit(0, &msg_registered)) - usb_composite_unregister(&msg_driver); + usb_composite_unregister(&msg_driver); } module_exit(msg_cleanup); diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c index a95b3e75f750..ad8402906f77 100644 --- a/drivers/usb/gadget/udc/atmel_usba_udc.c +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c @@ -28,6 +28,8 @@ #include #include "atmel_usba_udc.h" +#define USBA_VBUS_IRQFLAGS (IRQF_ONESHOT \ + | IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING) #ifdef CONFIG_USB_GADGET_DEBUG_FS #include @@ -2172,7 +2174,7 @@ static int usba_udc_probe(struct platform_device *pdev) IRQ_NOAUTOEN); ret = devm_request_threaded_irq(&pdev->dev, gpio_to_irq(udc->vbus_pin), NULL, - usba_vbus_irq_thread, IRQF_ONESHOT, + usba_vbus_irq_thread, USBA_VBUS_IRQFLAGS, "atmel_usba_udc", udc); if (ret) { udc->vbus_pin = -ENODEV; diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index 94c8a9f6cbf1..b62a3de65075 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -237,6 +237,8 @@ struct dummy_hcd { struct usb_device *udev; struct list_head urbp_list; + struct urbp *next_frame_urbp; + u32 stream_en_ep; u8 num_stream[30 / 2]; @@ -253,11 +255,13 @@ struct dummy { */ struct dummy_ep ep[DUMMY_ENDPOINTS]; int address; + int callback_usage; struct usb_gadget gadget; struct usb_gadget_driver *driver; struct dummy_request fifo_req; u8 fifo_buf[FIFO_SIZE]; u16 devstatus; + unsigned ints_enabled:1; unsigned udc_suspended:1; unsigned pullup:1; @@ -416,6 +420,7 @@ static void set_link_state_by_speed(struct dummy_hcd *dum_hcd) static void set_link_state(struct dummy_hcd *dum_hcd) { struct dummy *dum = dum_hcd->dum; + unsigned int power_bit; dum_hcd->active = 0; if (dum->pullup) @@ -426,32 +431,43 @@ static void set_link_state(struct dummy_hcd *dum_hcd) return; set_link_state_by_speed(dum_hcd); + power_bit = (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3 ? + USB_SS_PORT_STAT_POWER : USB_PORT_STAT_POWER); if ((dum_hcd->port_status & USB_PORT_STAT_ENABLE) == 0 || dum_hcd->active) dum_hcd->resuming = 0; /* Currently !connected or in reset */ - if ((dum_hcd->port_status & USB_PORT_STAT_CONNECTION) == 0 || + if ((dum_hcd->port_status & power_bit) == 0 || (dum_hcd->port_status & USB_PORT_STAT_RESET) != 0) { - unsigned disconnect = USB_PORT_STAT_CONNECTION & + unsigned int disconnect = power_bit & dum_hcd->old_status & (~dum_hcd->port_status); - unsigned reset = USB_PORT_STAT_RESET & + unsigned int reset = USB_PORT_STAT_RESET & (~dum_hcd->old_status) & dum_hcd->port_status; /* Report reset and disconnect events to the driver */ - if (dum->driver && (disconnect || reset)) { + if (dum->ints_enabled && (disconnect || reset)) { stop_activity(dum); + ++dum->callback_usage; + spin_unlock(&dum->lock); if (reset) usb_gadget_udc_reset(&dum->gadget, dum->driver); else dum->driver->disconnect(&dum->gadget); + spin_lock(&dum->lock); + --dum->callback_usage; } - } else if (dum_hcd->active != dum_hcd->old_active) { + } else if (dum_hcd->active != dum_hcd->old_active && + dum->ints_enabled) { + ++dum->callback_usage; + spin_unlock(&dum->lock); if (dum_hcd->old_active && dum->driver->suspend) dum->driver->suspend(&dum->gadget); else if (!dum_hcd->old_active && dum->driver->resume) dum->driver->resume(&dum->gadget); + spin_lock(&dum->lock); + --dum->callback_usage; } dum_hcd->old_status = dum_hcd->port_status; @@ -965,8 +981,11 @@ static int dummy_udc_start(struct usb_gadget *g, * can't enumerate without help from the driver we're binding. */ + spin_lock_irq(&dum->lock); dum->devstatus = 0; dum->driver = driver; + dum->ints_enabled = 1; + spin_unlock_irq(&dum->lock); return 0; } @@ -977,6 +996,16 @@ static int dummy_udc_stop(struct usb_gadget *g) struct dummy *dum = dum_hcd->dum; spin_lock_irq(&dum->lock); + dum->ints_enabled = 0; + stop_activity(dum); + + /* emulate synchronize_irq(): wait for callbacks to finish */ + while (dum->callback_usage > 0) { + spin_unlock_irq(&dum->lock); + usleep_range(1000, 2000); + spin_lock_irq(&dum->lock); + } + dum->driver = NULL; spin_unlock_irq(&dum->lock); @@ -1030,7 +1059,12 @@ static int dummy_udc_probe(struct platform_device *pdev) memzero_explicit(&dum->gadget, sizeof(struct usb_gadget)); dum->gadget.name = gadget_name; dum->gadget.ops = &dummy_ops; - dum->gadget.max_speed = USB_SPEED_SUPER; + if (mod_data.is_super_speed) + dum->gadget.max_speed = USB_SPEED_SUPER; + else if (mod_data.is_high_speed) + dum->gadget.max_speed = USB_SPEED_HIGH; + else + dum->gadget.max_speed = USB_SPEED_FULL; dum->gadget.dev.parent = &pdev->dev; init_dummy_udc_hw(dum); @@ -1239,6 +1273,8 @@ static int dummy_urb_enqueue( list_add_tail(&urbp->urbp_list, &dum_hcd->urbp_list); urb->hcpriv = urbp; + if (!dum_hcd->next_frame_urbp) + dum_hcd->next_frame_urbp = urbp; if (usb_pipetype(urb->pipe) == PIPE_CONTROL) urb->error_count = 1; /* mark as a new urb */ @@ -1515,6 +1551,8 @@ static struct dummy_ep *find_endpoint(struct dummy *dum, u8 address) if (!is_active((dum->gadget.speed == USB_SPEED_SUPER ? dum->ss_hcd : dum->hs_hcd))) return NULL; + if (!dum->ints_enabled) + return NULL; if ((address & ~USB_DIR_IN) == 0) return &dum->ep[0]; for (i = 1; i < DUMMY_ENDPOINTS; i++) { @@ -1756,6 +1794,7 @@ static void dummy_timer(unsigned long _dum_hcd) spin_unlock_irqrestore(&dum->lock, flags); return; } + dum_hcd->next_frame_urbp = NULL; for (i = 0; i < DUMMY_ENDPOINTS; i++) { if (!ep_info[i].name) @@ -1772,6 +1811,10 @@ restart: int type; int status = -EINPROGRESS; + /* stop when we reach URBs queued after the timer interrupt */ + if (urbp == dum_hcd->next_frame_urbp) + break; + urb = urbp->urb; if (urb->unlinked) goto return_urb; @@ -1851,10 +1894,12 @@ restart: * until setup() returns; no reentrancy issues etc. */ if (value > 0) { + ++dum->callback_usage; spin_unlock(&dum->lock); value = dum->driver->setup(&dum->gadget, &setup); spin_lock(&dum->lock); + --dum->callback_usage; if (value >= 0) { /* no delays (max 64KB data stage) */ @@ -2559,8 +2604,6 @@ static struct hc_driver dummy_hcd = { .product_desc = "Dummy host controller", .hcd_priv_size = sizeof(struct dummy_hcd), - .flags = HCD_USB3 | HCD_SHARED, - .reset = dummy_setup, .start = dummy_start, .stop = dummy_stop, @@ -2589,8 +2632,12 @@ static int dummy_hcd_probe(struct platform_device *pdev) dev_info(&pdev->dev, "%s, driver " DRIVER_VERSION "\n", driver_desc); dum = *((void **)dev_get_platdata(&pdev->dev)); - if (!mod_data.is_super_speed) + if (mod_data.is_super_speed) + dummy_hcd.flags = HCD_USB3 | HCD_SHARED; + else if (mod_data.is_high_speed) dummy_hcd.flags = HCD_USB2; + else + dummy_hcd.flags = HCD_USB11; hs_hcd = usb_create_hcd(&dummy_hcd, &pdev->dev, dev_name(&pdev->dev)); if (!hs_hcd) return -ENOMEM; diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index d2cfefadca3c..bb89e24c48b4 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -879,7 +879,7 @@ static int usb3_write_pipe(struct renesas_usb3_ep *usb3_ep, usb3_ep->ep.maxpacket); u8 *buf = usb3_req->req.buf + usb3_req->req.actual; u32 tmp = 0; - bool is_last; + bool is_last = !len ? true : false; if (usb3_wait_pipe_status(usb3_ep, PX_STA_BUFSTS) < 0) return -EBUSY; @@ -900,7 +900,8 @@ static int usb3_write_pipe(struct renesas_usb3_ep *usb3_ep, usb3_write(usb3, tmp, fifo_reg); } - is_last = usb3_is_transfer_complete(usb3_ep, usb3_req); + if (!is_last) + is_last = usb3_is_transfer_complete(usb3_ep, usb3_req); /* Send the data */ usb3_set_px_con_send(usb3_ep, len, is_last); @@ -991,7 +992,8 @@ static void usb3_start_pipe0(struct renesas_usb3_ep *usb3_ep, usb3_set_p0_con_for_ctrl_read_data(usb3); } else { usb3_clear_bit(usb3, P0_MOD_DIR, USB3_P0_MOD); - usb3_set_p0_con_for_ctrl_write_data(usb3); + if (usb3_req->req.length) + usb3_set_p0_con_for_ctrl_write_data(usb3); } usb3_p0_xfer(usb3_ep, usb3_req); @@ -1568,7 +1570,16 @@ static u32 usb3_calc_ramarea(int ram_size) static u32 usb3_calc_rammap_val(struct renesas_usb3_ep *usb3_ep, const struct usb_endpoint_descriptor *desc) { - return usb3_ep->rammap_val | PN_RAMMAP_MPKT(usb_endpoint_maxp(desc)); + int i; + const u32 max_packet_array[] = {8, 16, 32, 64, 512}; + u32 mpkt = PN_RAMMAP_MPKT(1024); + + for (i = 0; i < ARRAY_SIZE(max_packet_array); i++) { + if (usb_endpoint_maxp(desc) <= max_packet_array[i]) + mpkt = PN_RAMMAP_MPKT(max_packet_array[i]); + } + + return usb3_ep->rammap_val | mpkt; } static int usb3_enable_pipe_n(struct renesas_usb3_ep *usb3_ep, diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig index 0b80cee30da4..eb121b2a55d4 100644 --- a/drivers/usb/host/Kconfig +++ b/drivers/usb/host/Kconfig @@ -45,9 +45,9 @@ config USB_XHCI_PLATFORM If unsure, say N. config USB_XHCI_MTK - tristate "xHCI support for Mediatek MT65xx" + tristate "xHCI support for Mediatek MT65xx/MT7621" select MFD_SYSCON - depends on ARCH_MEDIATEK || COMPILE_TEST + depends on (MIPS && SOC_MT7621) || ARCH_MEDIATEK || COMPILE_TEST ---help--- Say 'Y' to enable the support for the xHCI host controller found in Mediatek MT65xx SoCs. diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index 58b9685eb21f..ee213c5f4107 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -447,7 +447,7 @@ static int usb_asmedia_wait_write(struct pci_dev *pdev) if ((value & ASMT_CONTROL_WRITE_BIT) == 0) return 0; - usleep_range(40, 60); + udelay(50); } dev_warn(&pdev->dev, "%s: check_write_ready timeout", __func__); @@ -1022,7 +1022,7 @@ EXPORT_SYMBOL_GPL(usb_disable_xhci_ports); * * Takes care of the handoff between the Pre-OS (i.e. BIOS) and the OS. * It signals to the BIOS that the OS wants control of the host controller, - * and then waits 5 seconds for the BIOS to hand over control. + * and then waits 1 second for the BIOS to hand over control. * If we timeout, assume the BIOS is broken and take control anyway. */ static void quirk_usb_handoff_xhci(struct pci_dev *pdev) @@ -1069,9 +1069,9 @@ static void quirk_usb_handoff_xhci(struct pci_dev *pdev) if (val & XHCI_HC_BIOS_OWNED) { writel(val | XHCI_HC_OS_OWNED, base + ext_cap_offset); - /* Wait for 5 seconds with 10 microsecond polling interval */ + /* Wait for 1 second with 10 microsecond polling interval */ timeout = handshake(base + ext_cap_offset, XHCI_HC_BIOS_OWNED, - 0, 5000, 10); + 0, 1000000, 10); /* Assume a buggy BIOS and take HC ownership anyway */ if (timeout) { @@ -1100,7 +1100,7 @@ hc_init: * operational or runtime registers. Wait 5 seconds and no more. */ timeout = handshake(op_reg_base + XHCI_STS_OFFSET, XHCI_STS_CNR, 0, - 5000, 10); + 5000000, 10); /* Assume a buggy HC and start HC initialization anyway */ if (timeout) { val = readl(op_reg_base + XHCI_STS_OFFSET); diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 36b7789f8f22..4a02c5c7df0d 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -112,7 +112,7 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, /* If PSI table exists, add the custom speed attributes from it */ if (usb3_1 && xhci->usb3_rhub.psi_count) { - u32 ssp_cap_base, bm_attrib, psi; + u32 ssp_cap_base, bm_attrib, psi, psi_mant, psi_exp; int offset; ssp_cap_base = USB_DT_BOS_SIZE + USB_DT_USB_SS_CAP_SIZE; @@ -139,6 +139,15 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, for (i = 0; i < xhci->usb3_rhub.psi_count; i++) { psi = xhci->usb3_rhub.psi[i]; psi &= ~USB_SSP_SUBLINK_SPEED_RSVD; + psi_exp = XHCI_EXT_PORT_PSIE(psi); + psi_mant = XHCI_EXT_PORT_PSIM(psi); + + /* Shift to Gbps and set SSP Link BIT(14) if 10Gpbs */ + for (; psi_exp < 3; psi_exp++) + psi_mant /= 1000; + if (psi_mant >= 10) + psi |= BIT(14); + if ((psi & PLT_MASK) == PLT_SYM) { /* Symmetric, create SSA RX and TX from one PSI entry */ put_unaligned_le32(psi, &buf[offset]); @@ -1351,9 +1360,6 @@ int xhci_bus_suspend(struct usb_hcd *hcd) t2 |= PORT_WKOC_E | PORT_WKCONN_E; t2 &= ~PORT_WKDISC_E; } - if ((xhci->quirks & XHCI_U2_DISABLE_WAKE) && - (hcd->speed < HCD_USB3)) - t2 &= ~PORT_WAKE_BITS; } else t2 &= ~PORT_WAKE_BITS; diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 23833448e602..c87ef38e7416 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -54,11 +54,6 @@ #define PCI_DEVICE_ID_INTEL_APL_XHCI 0x5aa8 #define PCI_DEVICE_ID_INTEL_DNV_XHCI 0x19d0 -#define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 -#define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba -#define PCI_DEVICE_ID_AMD_PROMONTORYA_2 0x43bb -#define PCI_DEVICE_ID_AMD_PROMONTORYA_1 0x43bc - #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142 static const char hcd_name[] = "xhci_hcd"; @@ -142,13 +137,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_AMD) xhci->quirks |= XHCI_TRUST_TX_LENGTH; - if ((pdev->vendor == PCI_VENDOR_ID_AMD) && - ((pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4) || - (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_3) || - (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_2) || - (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_1))) - xhci->quirks |= XHCI_U2_DISABLE_WAKE; - if (pdev->vendor == PCI_VENDOR_ID_INTEL) { xhci->quirks |= XHCI_LPM_SUPPORT; xhci->quirks |= XHCI_INTEL_HOST; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 82308af5801b..a7d239f5fc5f 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4855,7 +4855,8 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) */ hcd->has_tt = 1; } else { - if (xhci->sbrn == 0x31) { + /* Some 3.1 hosts return sbrn 0x30, can't rely on sbrn alone */ + if (xhci->sbrn == 0x31 || xhci->usb3_rhub.min_rev >= 1) { xhci_info(xhci, "Host supports USB 3.1 Enhanced SuperSpeed\n"); hcd->speed = HCD_USB31; hcd->self.root_hub->speed = USB_SPEED_SUPER_PLUS; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index a0f4a9feb058..836398ade58d 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1509,7 +1509,7 @@ struct xhci_bus_state { static inline unsigned int hcd_index(struct usb_hcd *hcd) { - if (hcd->speed == HCD_USB3) + if (hcd->speed >= HCD_USB3) return 0; else return 1; @@ -1660,7 +1660,7 @@ struct xhci_hcd { /* For controller with a broken Port Disable implementation */ #define XHCI_BROKEN_PORT_PED (1 << 25) #define XHCI_LIMIT_ENDPOINT_INTERVAL_7 (1 << 26) -#define XHCI_U2_DISABLE_WAKE (1 << 27) +/* Reserved. It was XHCI_U2_DISABLE_WAKE */ #define XHCI_ASMEDIA_MODIFY_FLOWCONTROL (1 << 28) unsigned int num_active_eps; diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index a6b6b1cf1317..aac28d998073 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -890,7 +890,7 @@ b_host: */ if (int_usb & MUSB_INTR_RESET) { handled = IRQ_HANDLED; - if (devctl & MUSB_DEVCTL_HM) { + if (is_host_active(musb)) { /* * When BABBLE happens what we can depends on which * platform MUSB is running, because some platforms @@ -900,9 +900,7 @@ b_host: * drop the session. */ dev_err(musb->controller, "Babble\n"); - - if (is_host_active(musb)) - musb_recover_from_babble(musb); + musb_recover_from_babble(musb); } else { musb_dbg(musb, "BUS RESET as %s", usb_otg_state_string(musb->xceiv->otg->state)); diff --git a/drivers/usb/musb/sunxi.c b/drivers/usb/musb/sunxi.c index 1408245be18e..3e1f3daa42f5 100644 --- a/drivers/usb/musb/sunxi.c +++ b/drivers/usb/musb/sunxi.c @@ -313,6 +313,8 @@ static int sunxi_musb_exit(struct musb *musb) if (test_bit(SUNXI_MUSB_FL_HAS_SRAM, &glue->flags)) sunxi_sram_release(musb->controller->parent); + devm_usb_put_phy(glue->dev, glue->xceiv); + return 0; } diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 857e78337324..6c6a3a8df07a 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -285,11 +285,26 @@ static void usbhsf_fifo_clear(struct usbhs_pipe *pipe, struct usbhs_fifo *fifo) { struct usbhs_priv *priv = usbhs_pipe_to_priv(pipe); + int ret = 0; - if (!usbhs_pipe_is_dcp(pipe)) - usbhsf_fifo_barrier(priv, fifo); + if (!usbhs_pipe_is_dcp(pipe)) { + /* + * This driver checks the pipe condition first to avoid -EBUSY + * from usbhsf_fifo_barrier() with about 10 msec delay in + * the interrupt handler if the pipe is RX direction and empty. + */ + if (usbhs_pipe_is_dir_in(pipe)) + ret = usbhs_pipe_is_accessible(pipe); + if (!ret) + ret = usbhsf_fifo_barrier(priv, fifo); + } - usbhs_write(priv, fifo->ctr, BCLR); + /* + * if non-DCP pipe, this driver should set BCLR when + * usbhsf_fifo_barrier() returns 0. + */ + if (!ret) + usbhs_write(priv, fifo->ctr, BCLR); } static int usbhsf_fifo_rcv_len(struct usbhs_priv *priv, @@ -845,9 +860,9 @@ static void xfer_work(struct work_struct *work) fifo->name, usbhs_pipe_number(pipe), pkt->length, pkt->zero); usbhs_pipe_running(pipe, 1); - usbhsf_dma_start(pipe, fifo); usbhs_pipe_set_trans_count_if_bulk(pipe, pkt->trans); dma_async_issue_pending(chan); + usbhsf_dma_start(pipe, fifo); usbhs_pipe_enable(pipe); xfer_work_end: diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c index b6f1adefb758..76062ce2d459 100644 --- a/drivers/usb/serial/console.c +++ b/drivers/usb/serial/console.c @@ -186,6 +186,7 @@ static int usb_console_setup(struct console *co, char *options) tty_kref_put(tty); reset_open_count: port->port.count = 0; + info->port = NULL; usb_autopm_put_interface(serial->interface); error_get_interface: usb_serial_put(serial); diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 470b17b0c11b..11ee55e080e5 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -171,6 +171,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */ + { USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */ { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */ { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */ { USB_DEVICE(0x1901, 0x0194) }, /* GE Healthcare Remote Alarm Box */ diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 19394963f675..3249f42b4b93 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1015,6 +1015,8 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(WICED_VID, WICED_USB20706V2_PID) }, { USB_DEVICE(TI_VID, TI_CC3200_LAUNCHPAD_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) }, + { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 4fcf1cecb6d7..f9d15bd62785 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -609,6 +609,13 @@ #define ADI_GNICE_PID 0xF000 #define ADI_GNICEPLUS_PID 0xF001 +/* + * Cypress WICED USB UART + */ +#define CYPRESS_VID 0x04B4 +#define CYPRESS_WICED_BT_USB_PID 0x009B +#define CYPRESS_WICED_WL_USB_PID 0xF900 + /* * Microchip Technology, Inc. * diff --git a/drivers/usb/serial/metro-usb.c b/drivers/usb/serial/metro-usb.c index 39e683096e94..45182c65fa1f 100644 --- a/drivers/usb/serial/metro-usb.c +++ b/drivers/usb/serial/metro-usb.c @@ -45,6 +45,7 @@ struct metrousb_private { static const struct usb_device_id id_table[] = { { USB_DEVICE(FOCUS_VENDOR_ID, FOCUS_PRODUCT_ID_BI) }, { USB_DEVICE(FOCUS_VENDOR_ID, FOCUS_PRODUCT_ID_UNI) }, + { USB_DEVICE_INTERFACE_CLASS(0x0c2e, 0x0730, 0xff) }, /* MS7820 */ { }, /* Terminating entry. */ }; MODULE_DEVICE_TABLE(usb, id_table); diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 136ff5e1b7c1..135eb04368f9 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -234,11 +234,16 @@ static int read_mos_reg(struct usb_serial *serial, unsigned int serial_portnum, status = usb_control_msg(usbdev, pipe, request, requesttype, value, index, buf, 1, MOS_WDR_TIMEOUT); - if (status == 1) + if (status == 1) { *data = *buf; - else if (status < 0) + } else { dev_err(&usbdev->dev, "mos7720: usb_control_msg() failed: %d\n", status); + if (status >= 0) + status = -EIO; + *data = 0; + } + kfree(buf); return status; diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 5c4fc3abf6a7..6baacf64b21e 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -285,9 +285,15 @@ static int mos7840_get_reg_sync(struct usb_serial_port *port, __u16 reg, ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ, MCS_RD_RTYPE, 0, reg, buf, VENDOR_READ_LENGTH, MOS_WDR_TIMEOUT); + if (ret < VENDOR_READ_LENGTH) { + if (ret >= 0) + ret = -EIO; + goto out; + } + *val = buf[0]; dev_dbg(&port->dev, "%s offset is %x, return val %x\n", __func__, reg, *val); - +out: kfree(buf); return ret; } @@ -353,8 +359,13 @@ static int mos7840_get_uart_reg(struct usb_serial_port *port, __u16 reg, ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ, MCS_RD_RTYPE, Wval, reg, buf, VENDOR_READ_LENGTH, MOS_WDR_TIMEOUT); + if (ret < VENDOR_READ_LENGTH) { + if (ret >= 0) + ret = -EIO; + goto out; + } *val = buf[0]; - +out: kfree(buf); return ret; } @@ -1490,10 +1501,10 @@ static int mos7840_tiocmget(struct tty_struct *tty) return -ENODEV; status = mos7840_get_uart_reg(port, MODEM_STATUS_REGISTER, &msr); - if (status != 1) + if (status < 0) return -EIO; status = mos7840_get_uart_reg(port, MODEM_CONTROL_REGISTER, &mcr); - if (status != 1) + if (status < 0) return -EIO; result = ((mcr & MCR_DTR) ? TIOCM_DTR : 0) | ((mcr & MCR_RTS) ? TIOCM_RTS : 0) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 2a9944326210..db3d34c2c82e 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -522,6 +522,7 @@ static void option_instat_callback(struct urb *urb); /* TP-LINK Incorporated products */ #define TPLINK_VENDOR_ID 0x2357 +#define TPLINK_PRODUCT_LTE 0x000D #define TPLINK_PRODUCT_MA180 0x0201 /* Changhong products */ @@ -2011,6 +2012,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) }, { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600A) }, { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600E) }, + { USB_DEVICE_AND_INTERFACE_INFO(TPLINK_VENDOR_ID, TPLINK_PRODUCT_LTE, 0xff, 0x00, 0x00) }, /* TP-Link LTE Module */ { USB_DEVICE(TPLINK_VENDOR_ID, TPLINK_PRODUCT_MA180), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(TPLINK_VENDOR_ID, 0x9000), /* TP-Link MA260 */ diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 652b4334b26d..e1c1e329c877 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -174,6 +174,10 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x413c, 0x81b3)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ {DEVICE_SWI(0x413c, 0x81b5)}, /* Dell Wireless 5811e QDL */ {DEVICE_SWI(0x413c, 0x81b6)}, /* Dell Wireless 5811e QDL */ + {DEVICE_SWI(0x413c, 0x81cf)}, /* Dell Wireless 5819 */ + {DEVICE_SWI(0x413c, 0x81d0)}, /* Dell Wireless 5819 */ + {DEVICE_SWI(0x413c, 0x81d1)}, /* Dell Wireless 5818 */ + {DEVICE_SWI(0x413c, 0x81d2)}, /* Dell Wireless 5818 */ /* Huawei devices */ {DEVICE_HWI(0x03f0, 0x581d)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */ diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index 1a59f335b063..a3ccb899df60 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -834,13 +834,25 @@ Retry_Sense: if (result == USB_STOR_TRANSPORT_GOOD) { srb->result = SAM_STAT_GOOD; srb->sense_buffer[0] = 0x0; + } + + /* + * ATA-passthru commands use sense data to report + * the command completion status, and often devices + * return Check Condition status when nothing is + * wrong. + */ + else if (srb->cmnd[0] == ATA_16 || + srb->cmnd[0] == ATA_12) { + /* leave the data alone */ + } /* * If there was a problem, report an unspecified * hardware error to prevent the higher layers from * entering an infinite retry loop. */ - } else { + else { srb->result = DID_ERROR << 16; if ((sshdr.response_code & 0x72) == 0x72) srb->sense_buffer[1] = HARDWARE_ERROR; diff --git a/drivers/usb/storage/uas-detect.h b/drivers/usb/storage/uas-detect.h index f58caa9e6a27..a155cd02bce2 100644 --- a/drivers/usb/storage/uas-detect.h +++ b/drivers/usb/storage/uas-detect.h @@ -9,7 +9,8 @@ static int uas_is_interface(struct usb_host_interface *intf) intf->desc.bInterfaceProtocol == USB_PR_UAS); } -static int uas_find_uas_alt_setting(struct usb_interface *intf) +static struct usb_host_interface *uas_find_uas_alt_setting( + struct usb_interface *intf) { int i; @@ -17,10 +18,10 @@ static int uas_find_uas_alt_setting(struct usb_interface *intf) struct usb_host_interface *alt = &intf->altsetting[i]; if (uas_is_interface(alt)) - return alt->desc.bAlternateSetting; + return alt; } - return -ENODEV; + return NULL; } static int uas_find_endpoints(struct usb_host_interface *alt, @@ -58,14 +59,14 @@ static int uas_use_uas_driver(struct usb_interface *intf, struct usb_device *udev = interface_to_usbdev(intf); struct usb_hcd *hcd = bus_to_hcd(udev->bus); unsigned long flags = id->driver_info; - int r, alt; - + struct usb_host_interface *alt; + int r; alt = uas_find_uas_alt_setting(intf); - if (alt < 0) + if (!alt) return 0; - r = uas_find_endpoints(&intf->altsetting[alt], eps); + r = uas_find_endpoints(alt, eps); if (r < 0) return 0; diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 5ef014ba6ae8..9876af4ab64e 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -873,14 +873,14 @@ MODULE_DEVICE_TABLE(usb, uas_usb_ids); static int uas_switch_interface(struct usb_device *udev, struct usb_interface *intf) { - int alt; + struct usb_host_interface *alt; alt = uas_find_uas_alt_setting(intf); - if (alt < 0) - return alt; + if (!alt) + return -ENODEV; - return usb_set_interface(udev, - intf->altsetting[0].desc.bInterfaceNumber, alt); + return usb_set_interface(udev, alt->desc.bInterfaceNumber, + alt->desc.bAlternateSetting); } static int uas_configure_endpoints(struct uas_dev_info *devinfo) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 9129f6cb8230..2572fd5cd2bb 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1459,6 +1459,13 @@ UNUSUAL_DEV( 0x0bc2, 0x3010, 0x0000, 0x0000, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_SANE_SENSE ), +/* Reported by Kris Lindgren */ +UNUSUAL_DEV( 0x0bc2, 0x3332, 0x0000, 0x9999, + "Seagate", + "External", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_WP_DETECT ), + UNUSUAL_DEV( 0x0d49, 0x7310, 0x0000, 0x9999, "Maxtor", "USB to SATA", diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c index 35a1e777b449..9a53912bdfe9 100644 --- a/drivers/uwb/hwa-rc.c +++ b/drivers/uwb/hwa-rc.c @@ -825,6 +825,8 @@ static int hwarc_probe(struct usb_interface *iface, if (iface->cur_altsetting->desc.bNumEndpoints < 1) return -ENODEV; + if (!usb_endpoint_xfer_int(&iface->cur_altsetting->endpoint[0].desc)) + return -ENODEV; result = -ENOMEM; uwb_rc = uwb_rc_alloc(); diff --git a/drivers/uwb/uwbd.c b/drivers/uwb/uwbd.c index 01c20a260a8b..39dd4ef53c77 100644 --- a/drivers/uwb/uwbd.c +++ b/drivers/uwb/uwbd.c @@ -302,18 +302,22 @@ static int uwbd(void *param) /** Start the UWB daemon */ void uwbd_start(struct uwb_rc *rc) { - rc->uwbd.task = kthread_run(uwbd, rc, "uwbd"); - if (rc->uwbd.task == NULL) + struct task_struct *task = kthread_run(uwbd, rc, "uwbd"); + if (IS_ERR(task)) { + rc->uwbd.task = NULL; printk(KERN_ERR "UWB: Cannot start management daemon; " "UWB won't work\n"); - else + } else { + rc->uwbd.task = task; rc->uwbd.pid = rc->uwbd.task->pid; + } } /* Stop the UWB daemon and free any unprocessed events */ void uwbd_stop(struct uwb_rc *rc) { - kthread_stop(rc->uwbd.task); + if (rc->uwbd.task) + kthread_stop(rc->uwbd.task); uwbd_flush(rc); } diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c index 11026e726b68..81367cf0af77 100644 --- a/drivers/video/fbdev/aty/atyfb_base.c +++ b/drivers/video/fbdev/aty/atyfb_base.c @@ -1861,7 +1861,7 @@ static int atyfb_ioctl(struct fb_info *info, u_int cmd, u_long arg) #if defined(DEBUG) && defined(CONFIG_FB_ATY_CT) case ATYIO_CLKR: if (M64_HAS(INTEGRATED)) { - struct atyclk clk; + struct atyclk clk = { 0 }; union aty_pll *pll = &par->pll; u32 dsp_config = pll->ct.dsp_config; u32 dsp_on_off = pll->ct.dsp_on_off; diff --git a/drivers/watchdog/kempld_wdt.c b/drivers/watchdog/kempld_wdt.c index 8e302d0e346c..3efa295ac627 100644 --- a/drivers/watchdog/kempld_wdt.c +++ b/drivers/watchdog/kempld_wdt.c @@ -140,12 +140,19 @@ static int kempld_wdt_set_stage_timeout(struct kempld_wdt_data *wdt_data, unsigned int timeout) { struct kempld_device_data *pld = wdt_data->pld; - u32 prescaler = kempld_prescaler[PRESCALER_21]; + u32 prescaler; u64 stage_timeout64; u32 stage_timeout; u32 remainder; u8 stage_cfg; +#if GCC_VERSION < 40400 + /* work around a bug compiling do_div() */ + prescaler = READ_ONCE(kempld_prescaler[PRESCALER_21]); +#else + prescaler = kempld_prescaler[PRESCALER_21]; +#endif + if (!stage) return -EINVAL; diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 679f79f68182..b68ced5a6331 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -680,3 +680,22 @@ xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask) return 0; } EXPORT_SYMBOL_GPL(xen_swiotlb_set_dma_mask); + +/* + * Create userspace mapping for the DMA-coherent memory. + * This function should be called with the pages from the current domain only, + * passing pages mapped from other domains would lead to memory corruption. + */ +int +xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) + if (__generic_dma_ops(dev)->mmap) + return __generic_dma_ops(dev)->mmap(dev, vma, cpu_addr, + dma_addr, size, attrs); +#endif + return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); +} +EXPORT_SYMBOL_GPL(xen_swiotlb_dma_mmap); diff --git a/fs/block_dev.c b/fs/block_dev.c index 07e46b786500..cb936c90ae82 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -450,10 +450,12 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, set_page_writeback(page); result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true); - if (result) + if (result) { end_page_writeback(page); - else + } else { + clean_page_buffers(page); unlock_page(page); + } blk_queue_exit(bdev->bd_queue); return result; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8a05fa7e2152..f089d7d8afe7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8050,8 +8050,10 @@ next_block_or_try_again: start += sectorsize; - if (nr_sectors--) { + nr_sectors--; + if (nr_sectors) { pgoff += sectorsize; + ASSERT(pgoff < PAGE_SIZE); goto next_block_or_try_again; } } @@ -8157,8 +8159,10 @@ next: ASSERT(nr_sectors); - if (--nr_sectors) { + nr_sectors--; + if (nr_sectors) { pgoff += sectorsize; + ASSERT(pgoff < PAGE_SIZE); goto next_block; } } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1782804f6c26..0fe346c4bd28 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3052,7 +3052,7 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, out: if (ret) btrfs_cmp_data_free(cmp); - return 0; + return ret; } static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, @@ -4082,6 +4082,10 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) ret = PTR_ERR(new_root); goto out; } + if (!is_fstree(new_root->objectid)) { + ret = -ENOENT; + goto out; + } path = btrfs_alloc_path(); if (!path) { diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 2cf5e142675e..04c61bcf62e5 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2367,11 +2367,11 @@ void free_reloc_roots(struct list_head *list) while (!list_empty(list)) { reloc_root = list_entry(list->next, struct btrfs_root, root_list); + __del_reloc_root(reloc_root); free_extent_buffer(reloc_root->node); free_extent_buffer(reloc_root->commit_root); reloc_root->node = NULL; reloc_root->commit_root = NULL; - __del_reloc_root(reloc_root); } } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 71261b459863..77f9efc1f7aa 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1680,6 +1680,9 @@ static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen) { int ret; + if (ino == BTRFS_FIRST_FREE_OBJECTID) + return 1; + ret = get_cur_inode_state(sctx, ino, gen); if (ret < 0) goto out; @@ -1865,7 +1868,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, * not deleted and then re-created, if it was then we have no overwrite * and we can just unlink this entry. */ - if (sctx->parent_root) { + if (sctx->parent_root && dir != BTRFS_FIRST_FREE_OBJECTID) { ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL, NULL, NULL); if (ret < 0 && ret != -ENOENT) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 71a60cc01451..06a77e47957d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6226,7 +6226,7 @@ int btrfs_map_bio(struct btrfs_root *root, struct bio *bio, for (dev_nr = 0; dev_nr < total_devs; dev_nr++) { dev = bbio->stripes[dev_nr].dev; if (!dev || !dev->bdev || - (bio_op(bio) == REQ_OP_WRITE && !dev->writeable)) { + (bio_op(first_bio) == REQ_OP_WRITE && !dev->writeable)) { bbio_error(bbio, first_bio, logical); continue; } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 953275b651bc..4a6df2ce0f76 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1323,8 +1323,8 @@ retry_lookup: ceph_dir_clear_ordered(dir); dout("d_delete %p\n", dn); d_delete(dn); - } else { - if (have_lease && d_unhashed(dn)) + } else if (have_lease) { + if (d_unhashed(dn)) d_add(dn, NULL); update_dentry_lease(dn, rinfo->dlease, session, diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 7d752d53353a..4c9c72f26eb9 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -25,7 +25,7 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg) l.stripe_count = ci->i_layout.stripe_count; l.object_size = ci->i_layout.object_size; l.data_pool = ci->i_layout.pool_id; - l.preferred_osd = (s32)-1; + l.preferred_osd = -1; if (copy_to_user(arg, &l, sizeof(l))) return -EFAULT; } @@ -97,7 +97,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) nl.data_pool = ci->i_layout.pool_id; /* this is obsolete, and always -1 */ - nl.preferred_osd = le64_to_cpu(-1); + nl.preferred_osd = -1; err = __validate_layout(mdsc, &nl); if (err) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index e3e1a80b351e..c0f52c443c34 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1782,13 +1782,18 @@ static int build_dentry_path(struct dentry *dentry, int *pfreepath) { char *path; + struct inode *dir; - if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_NOSNAP) { - *pino = ceph_ino(d_inode(dentry->d_parent)); + rcu_read_lock(); + dir = d_inode_rcu(dentry->d_parent); + if (dir && ceph_snap(dir) == CEPH_NOSNAP) { + *pino = ceph_ino(dir); + rcu_read_unlock(); *ppath = dentry->d_name.name; *ppathlen = dentry->d_name.len; return 0; } + rcu_read_unlock(); path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1); if (IS_ERR(path)) return PTR_ERR(path); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index c0c253005b76..87658f63b374 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1360,7 +1360,7 @@ exit_cifs(void) exit_cifs_idmap(); #endif #ifdef CONFIG_CIFS_UPCALL - unregister_key_type(&cifs_spnego_key_type); + exit_cifs_spnego(); #endif cifs_destroy_request_bufs(); cifs_destroy_mids(); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 48ef401c3c61..7b496a4e650e 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -365,6 +365,8 @@ struct smb_version_operations { unsigned int (*calc_smb_size)(void *); /* check for STATUS_PENDING and process it in a positive case */ bool (*is_status_pending)(char *, struct TCP_Server_Info *, int); + /* check for STATUS_NETWORK_SESSION_EXPIRED */ + bool (*is_session_expired)(char *); /* send oplock break response */ int (*oplock_response)(struct cifs_tcon *, struct cifs_fid *, struct cifsInodeInfo *); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 1f91c9dadd5b..cc420d6b71f7 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1457,6 +1457,13 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) return length; server->total_read += length; + if (server->ops->is_session_expired && + server->ops->is_session_expired(buf)) { + cifs_reconnect(server); + wake_up(&server->response_q); + return -1; + } + if (server->ops->is_status_pending && server->ops->is_status_pending(buf, server, 0)) { discard_remaining_data(server); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 1a545695f547..7710dd90ca66 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -796,6 +796,13 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) cifs_dump_mem("Bad SMB: ", buf, min_t(unsigned int, server->total_read, 48)); + if (server->ops->is_session_expired && + server->ops->is_session_expired(buf)) { + cifs_reconnect(server); + wake_up(&server->response_q); + return -1; + } + if (server->ops->is_status_pending && server->ops->is_status_pending(buf, server, length)) return -1; @@ -2432,7 +2439,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) } down_read(&key->sem); - upayload = user_key_payload(key); + upayload = user_key_payload_locked(key); if (IS_ERR_OR_NULL(upayload)) { rc = upayload ? PTR_ERR(upayload) : -EINVAL; goto out_key_put; @@ -4071,6 +4078,14 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, cifs_dbg(FYI, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d\n", server->sec_mode, server->capabilities, server->timeAdj); + if (ses->auth_key.response) { + cifs_dbg(VFS, "Free previous auth_key.response = %p\n", + ses->auth_key.response); + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; + ses->auth_key.len = 0; + } + if (server->ops->sess_setup) rc = server->ops->sess_setup(xid, ses, nls_info); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 3925758f6dde..cf192f9ce254 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -224,6 +224,13 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; + /* O_SYNC also has bit for O_DSYNC so following check picks up either */ + if (f_flags & O_SYNC) + create_options |= CREATE_WRITE_THROUGH; + + if (f_flags & O_DIRECT) + create_options |= CREATE_NO_BUFFER; + oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = desired_access; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index b6968241c26f..812e4884c392 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1018,6 +1018,18 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server, int length) return true; } +static bool +smb2_is_session_expired(char *buf) +{ + struct smb2_hdr *hdr = (struct smb2_hdr *)buf; + + if (hdr->Status != STATUS_NETWORK_SESSION_EXPIRED) + return false; + + cifs_dbg(FYI, "Session expired\n"); + return true; +} + static int smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, struct cifsInodeInfo *cinode) @@ -1609,6 +1621,7 @@ struct smb_version_operations smb20_operations = { .close_dir = smb2_close_dir, .calc_smb_size = smb2_calc_size, .is_status_pending = smb2_is_status_pending, + .is_session_expired = smb2_is_session_expired, .oplock_response = smb2_oplock_response, .queryfs = smb2_queryfs, .mand_lock = smb2_mand_lock, @@ -1690,6 +1703,7 @@ struct smb_version_operations smb21_operations = { .close_dir = smb2_close_dir, .calc_smb_size = smb2_calc_size, .is_status_pending = smb2_is_status_pending, + .is_session_expired = smb2_is_session_expired, .oplock_response = smb2_oplock_response, .queryfs = smb2_queryfs, .mand_lock = smb2_mand_lock, @@ -1773,6 +1787,7 @@ struct smb_version_operations smb30_operations = { .close_dir = smb2_close_dir, .calc_smb_size = smb2_calc_size, .is_status_pending = smb2_is_status_pending, + .is_session_expired = smb2_is_session_expired, .oplock_response = smb2_oplock_response, .queryfs = smb2_queryfs, .mand_lock = smb2_mand_lock, @@ -1862,6 +1877,7 @@ struct smb_version_operations smb311_operations = { .close_dir = smb2_close_dir, .calc_smb_size = smb2_calc_size, .is_status_pending = smb2_is_status_pending, + .is_session_expired = smb2_is_session_expired, .oplock_response = smb2_oplock_response, .queryfs = smb2_queryfs, .mand_lock = smb2_mand_lock, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 0437e5fdba56..69b610ad3fdc 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -366,7 +366,7 @@ assemble_neg_contexts(struct smb2_negotiate_req *req) build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt); req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT); req->NegotiateContextCount = cpu_to_le16(2); - inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + 2 + inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + sizeof(struct smb2_encryption_neg_context)); /* calculate hash */ } #else @@ -531,15 +531,22 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) /* * validation ioctl must be signed, so no point sending this if we - * can not sign it. We could eventually change this to selectively + * can not sign it (ie are not known user). Even if signing is not + * required (enabled but not negotiated), in those cases we selectively * sign just this, the first and only signed request on a connection. - * This is good enough for now since a user who wants better security - * would also enable signing on the mount. Having validation of - * negotiate info for signed connections helps reduce attack vectors + * Having validation of negotiate info helps reduce attack vectors. */ - if (tcon->ses->server->sign == false) + if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST) return 0; /* validation requires signing */ + if (tcon->ses->user_name == NULL) { + cifs_dbg(FYI, "Can't validate negotiate: null user mount\n"); + return 0; /* validation requires signing */ + } + + if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_NULL) + cifs_dbg(VFS, "Unexpected null user (anonymous) auth flag sent by server\n"); + vneg_inbuf.Capabilities = cpu_to_le32(tcon->ses->server->vals->req_capabilities); memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid, @@ -1010,6 +1017,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, while (sess_data->func) sess_data->func(sess_data); + if ((ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST) && (ses->sign)) + cifs_dbg(VFS, "signing requested but authenticated as guest\n"); rc = sess_data->result; out: kfree(sess_data); diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig index 92348faf9865..08b46e6e3995 100644 --- a/fs/crypto/Kconfig +++ b/fs/crypto/Kconfig @@ -1,6 +1,5 @@ config FS_ENCRYPTION tristate "FS Encryption (Per-file encryption)" - depends on BLOCK select CRYPTO select CRYPTO_AES select CRYPTO_CBC @@ -8,9 +7,7 @@ config FS_ENCRYPTION select CRYPTO_XTS select CRYPTO_CTS select CRYPTO_CTR - select CRYPTO_SHA256 select KEYS - select ENCRYPTED_KEYS help Enable encryption of files and directories. This feature is similar to ecryptfs, but it is more memory diff --git a/fs/crypto/Makefile b/fs/crypto/Makefile index f17684c48739..9f6607f17b53 100644 --- a/fs/crypto/Makefile +++ b/fs/crypto/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_FS_ENCRYPTION) += fscrypto.o fscrypto-y := crypto.o fname.o policy.o keyinfo.o +fscrypto-$(CONFIG_BLOCK) += bio.o diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c new file mode 100644 index 000000000000..2596c9ad6d77 --- /dev/null +++ b/fs/crypto/bio.c @@ -0,0 +1,145 @@ +/* + * This contains encryption functions for per-file encryption. + * + * Copyright (C) 2015, Google, Inc. + * Copyright (C) 2015, Motorola Mobility + * + * Written by Michael Halcrow, 2014. + * + * Filename encryption additions + * Uday Savagaonkar, 2014 + * Encryption policy handling additions + * Ildar Muslukhov, 2014 + * Add fscrypt_pullback_bio_page() + * Jaegeuk Kim, 2015. + * + * This has not yet undergone a rigorous security audit. + * + * The usage of AES-XTS should conform to recommendations in NIST + * Special Publication 800-38E and IEEE P1619/D16. + */ + +#include +#include +#include +#include +#include "fscrypt_private.h" + +/* + * Call fscrypt_decrypt_page on every single page, reusing the encryption + * context. + */ +static void completion_pages(struct work_struct *work) +{ + struct fscrypt_ctx *ctx = + container_of(work, struct fscrypt_ctx, r.work); + struct bio *bio = ctx->r.bio; + struct bio_vec *bv; + int i; + + bio_for_each_segment_all(bv, bio, i) { + struct page *page = bv->bv_page; + int ret = fscrypt_decrypt_page(page->mapping->host, page, + PAGE_SIZE, 0, page->index); + + if (ret) { + WARN_ON_ONCE(1); + SetPageError(page); + } else { + SetPageUptodate(page); + } + unlock_page(page); + } + fscrypt_release_ctx(ctx); + bio_put(bio); +} + +void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio) +{ + INIT_WORK(&ctx->r.work, completion_pages); + ctx->r.bio = bio; + queue_work(fscrypt_read_workqueue, &ctx->r.work); +} +EXPORT_SYMBOL(fscrypt_decrypt_bio_pages); + +void fscrypt_pullback_bio_page(struct page **page, bool restore) +{ + struct fscrypt_ctx *ctx; + struct page *bounce_page; + + /* The bounce data pages are unmapped. */ + if ((*page)->mapping) + return; + + /* The bounce data page is unmapped. */ + bounce_page = *page; + ctx = (struct fscrypt_ctx *)page_private(bounce_page); + + /* restore control page */ + *page = ctx->w.control_page; + + if (restore) + fscrypt_restore_control_page(bounce_page); +} +EXPORT_SYMBOL(fscrypt_pullback_bio_page); + +int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, + sector_t pblk, unsigned int len) +{ + struct fscrypt_ctx *ctx; + struct page *ciphertext_page = NULL; + struct bio *bio; + int ret, err = 0; + + BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE); + + ctx = fscrypt_get_ctx(inode, GFP_NOFS); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ciphertext_page = fscrypt_alloc_bounce_page(ctx, GFP_NOWAIT); + if (IS_ERR(ciphertext_page)) { + err = PTR_ERR(ciphertext_page); + goto errout; + } + + while (len--) { + err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk, + ZERO_PAGE(0), ciphertext_page, + PAGE_SIZE, 0, GFP_NOFS); + if (err) + goto errout; + + bio = bio_alloc(GFP_NOWAIT, 1); + if (!bio) { + err = -ENOMEM; + goto errout; + } + bio->bi_bdev = inode->i_sb->s_bdev; + bio->bi_iter.bi_sector = + pblk << (inode->i_sb->s_blocksize_bits - 9); + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + ret = bio_add_page(bio, ciphertext_page, + inode->i_sb->s_blocksize, 0); + if (ret != inode->i_sb->s_blocksize) { + /* should never happen! */ + WARN_ON(1); + bio_put(bio); + err = -EIO; + goto errout; + } + err = submit_bio_wait(bio); + if (err == 0 && bio->bi_error) + err = -EIO; + bio_put(bio); + if (err) + goto errout; + lblk++; + pblk++; + } + err = 0; +errout: + fscrypt_release_ctx(ctx); + return err; +} +EXPORT_SYMBOL(fscrypt_zeroout_range); diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 61cfccea77bc..c7835df7e7b8 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -24,10 +24,10 @@ #include #include #include -#include #include #include -#include +#include +#include "fscrypt_private.h" static unsigned int num_prealloc_crypto_pages = 32; static unsigned int num_prealloc_crypto_ctxs = 128; @@ -44,7 +44,7 @@ static mempool_t *fscrypt_bounce_page_pool = NULL; static LIST_HEAD(fscrypt_free_ctxs); static DEFINE_SPINLOCK(fscrypt_ctx_lock); -static struct workqueue_struct *fscrypt_read_workqueue; +struct workqueue_struct *fscrypt_read_workqueue; static DEFINE_MUTEX(fscrypt_init_mutex); static struct kmem_cache *fscrypt_ctx_cachep; @@ -63,7 +63,7 @@ void fscrypt_release_ctx(struct fscrypt_ctx *ctx) { unsigned long flags; - if (ctx->flags & FS_WRITE_PATH_FL && ctx->w.bounce_page) { + if (ctx->flags & FS_CTX_HAS_BOUNCE_BUFFER_FL && ctx->w.bounce_page) { mempool_free(ctx->w.bounce_page, fscrypt_bounce_page_pool); ctx->w.bounce_page = NULL; } @@ -88,7 +88,7 @@ EXPORT_SYMBOL(fscrypt_release_ctx); * Return: An allocated and initialized encryption context on success; error * value or NULL otherwise. */ -struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode, gfp_t gfp_flags) +struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags) { struct fscrypt_ctx *ctx = NULL; struct fscrypt_info *ci = inode->i_crypt_info; @@ -121,7 +121,7 @@ struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode, gfp_t gfp_flags) } else { ctx->flags &= ~FS_CTX_REQUIRES_FREE_ENCRYPT_FL; } - ctx->flags &= ~FS_WRITE_PATH_FL; + ctx->flags &= ~FS_CTX_HAS_BOUNCE_BUFFER_FL; return ctx; } EXPORT_SYMBOL(fscrypt_get_ctx); @@ -141,20 +141,15 @@ static void page_crypt_complete(struct crypto_async_request *req, int res) complete(&ecr->completion); } -typedef enum { - FS_DECRYPT = 0, - FS_ENCRYPT, -} fscrypt_direction_t; - -static int do_page_crypto(struct inode *inode, - fscrypt_direction_t rw, pgoff_t index, - struct page *src_page, struct page *dest_page, - gfp_t gfp_flags) +int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, + u64 lblk_num, struct page *src_page, + struct page *dest_page, unsigned int len, + unsigned int offs, gfp_t gfp_flags) { struct { __le64 index; - u8 padding[FS_XTS_TWEAK_SIZE - sizeof(__le64)]; - } xts_tweak; + u8 padding[FS_IV_SIZE - sizeof(__le64)]; + } iv; struct skcipher_request *req = NULL; DECLARE_FS_COMPLETION_RESULT(ecr); struct scatterlist dst, src; @@ -162,6 +157,18 @@ static int do_page_crypto(struct inode *inode, struct crypto_skcipher *tfm = ci->ci_ctfm; int res = 0; + BUG_ON(len == 0); + + BUILD_BUG_ON(sizeof(iv) != FS_IV_SIZE); + BUILD_BUG_ON(AES_BLOCK_SIZE != FS_IV_SIZE); + iv.index = cpu_to_le64(lblk_num); + memset(iv.padding, 0, sizeof(iv.padding)); + + if (ci->ci_essiv_tfm != NULL) { + crypto_cipher_encrypt_one(ci->ci_essiv_tfm, (u8 *)&iv, + (u8 *)&iv); + } + req = skcipher_request_alloc(tfm, gfp_flags); if (!req) { printk_ratelimited(KERN_ERR @@ -174,15 +181,11 @@ static int do_page_crypto(struct inode *inode, req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, page_crypt_complete, &ecr); - BUILD_BUG_ON(sizeof(xts_tweak) != FS_XTS_TWEAK_SIZE); - xts_tweak.index = cpu_to_le64(index); - memset(xts_tweak.padding, 0, sizeof(xts_tweak.padding)); - sg_init_table(&dst, 1); - sg_set_page(&dst, dest_page, PAGE_SIZE, 0); + sg_set_page(&dst, dest_page, len, offs); sg_init_table(&src, 1); - sg_set_page(&src, src_page, PAGE_SIZE, 0); - skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE, &xts_tweak); + sg_set_page(&src, src_page, len, offs); + skcipher_request_set_crypt(req, &src, &dst, len, &iv); if (rw == FS_DECRYPT) res = crypto_skcipher_decrypt(req); else @@ -202,53 +205,86 @@ static int do_page_crypto(struct inode *inode, return 0; } -static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags) +struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx, + gfp_t gfp_flags) { ctx->w.bounce_page = mempool_alloc(fscrypt_bounce_page_pool, gfp_flags); if (ctx->w.bounce_page == NULL) return ERR_PTR(-ENOMEM); - ctx->flags |= FS_WRITE_PATH_FL; + ctx->flags |= FS_CTX_HAS_BOUNCE_BUFFER_FL; return ctx->w.bounce_page; } /** * fscypt_encrypt_page() - Encrypts a page - * @inode: The inode for which the encryption should take place - * @plaintext_page: The page to encrypt. Must be locked. - * @gfp_flags: The gfp flag for memory allocation + * @inode: The inode for which the encryption should take place + * @page: The page to encrypt. Must be locked for bounce-page + * encryption. + * @len: Length of data to encrypt in @page and encrypted + * data in returned page. + * @offs: Offset of data within @page and returned + * page holding encrypted data. + * @lblk_num: Logical block number. This must be unique for multiple + * calls with same inode, except when overwriting + * previously written data. + * @gfp_flags: The gfp flag for memory allocation * - * Allocates a ciphertext page and encrypts plaintext_page into it using the ctx - * encryption context. + * Encrypts @page using the ctx encryption context. Performs encryption + * either in-place or into a newly allocated bounce page. + * Called on the page write path. * - * Called on the page write path. The caller must call + * Bounce page allocation is the default. + * In this case, the contents of @page are encrypted and stored in an + * allocated bounce page. @page has to be locked and the caller must call * fscrypt_restore_control_page() on the returned ciphertext page to * release the bounce buffer and the encryption context. * - * Return: An allocated page with the encrypted content on success. Else, an + * In-place encryption is used by setting the FS_CFLG_OWN_PAGES flag in + * fscrypt_operations. Here, the input-page is returned with its content + * encrypted. + * + * Return: A page with the encrypted content on success. Else, an * error value or NULL. */ -struct page *fscrypt_encrypt_page(struct inode *inode, - struct page *plaintext_page, gfp_t gfp_flags) +struct page *fscrypt_encrypt_page(const struct inode *inode, + struct page *page, + unsigned int len, + unsigned int offs, + u64 lblk_num, gfp_t gfp_flags) + { struct fscrypt_ctx *ctx; - struct page *ciphertext_page = NULL; + struct page *ciphertext_page = page; int err; - BUG_ON(!PageLocked(plaintext_page)); + BUG_ON(len % FS_CRYPTO_BLOCK_SIZE != 0); + + if (inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES) { + /* with inplace-encryption we just encrypt the page */ + err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num, page, + ciphertext_page, len, offs, + gfp_flags); + if (err) + return ERR_PTR(err); + + return ciphertext_page; + } + + BUG_ON(!PageLocked(page)); ctx = fscrypt_get_ctx(inode, gfp_flags); if (IS_ERR(ctx)) return (struct page *)ctx; /* The encryption operation will require a bounce page. */ - ciphertext_page = alloc_bounce_page(ctx, gfp_flags); + ciphertext_page = fscrypt_alloc_bounce_page(ctx, gfp_flags); if (IS_ERR(ciphertext_page)) goto errout; - ctx->w.control_page = plaintext_page; - err = do_page_crypto(inode, FS_ENCRYPT, plaintext_page->index, - plaintext_page, ciphertext_page, - gfp_flags); + ctx->w.control_page = page; + err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num, + page, ciphertext_page, len, offs, + gfp_flags); if (err) { ciphertext_page = ERR_PTR(err); goto errout; @@ -265,8 +301,13 @@ errout: EXPORT_SYMBOL(fscrypt_encrypt_page); /** - * f2crypt_decrypt_page() - Decrypts a page in-place - * @page: The page to decrypt. Must be locked. + * fscrypt_decrypt_page() - Decrypts a page in-place + * @inode: The corresponding inode for the page to decrypt. + * @page: The page to decrypt. Must be locked in case + * it is a writeback page (FS_CFLG_OWN_PAGES unset). + * @len: Number of bytes in @page to be decrypted. + * @offs: Start of data in @page. + * @lblk_num: Logical block number. * * Decrypts page in-place using the ctx encryption context. * @@ -274,76 +315,17 @@ EXPORT_SYMBOL(fscrypt_encrypt_page); * * Return: Zero on success, non-zero otherwise. */ -int fscrypt_decrypt_page(struct page *page) +int fscrypt_decrypt_page(const struct inode *inode, struct page *page, + unsigned int len, unsigned int offs, u64 lblk_num) { - BUG_ON(!PageLocked(page)); + if (!(inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES)) + BUG_ON(!PageLocked(page)); - return do_page_crypto(page->mapping->host, - FS_DECRYPT, page->index, page, page, GFP_NOFS); + return fscrypt_do_page_crypto(inode, FS_DECRYPT, lblk_num, page, page, + len, offs, GFP_NOFS); } EXPORT_SYMBOL(fscrypt_decrypt_page); -int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk, - sector_t pblk, unsigned int len) -{ - struct fscrypt_ctx *ctx; - struct page *ciphertext_page = NULL; - struct bio *bio; - int ret, err = 0; - - BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE); - - ctx = fscrypt_get_ctx(inode, GFP_NOFS); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ciphertext_page = alloc_bounce_page(ctx, GFP_NOWAIT); - if (IS_ERR(ciphertext_page)) { - err = PTR_ERR(ciphertext_page); - goto errout; - } - - while (len--) { - err = do_page_crypto(inode, FS_ENCRYPT, lblk, - ZERO_PAGE(0), ciphertext_page, - GFP_NOFS); - if (err) - goto errout; - - bio = bio_alloc(GFP_NOWAIT, 1); - if (!bio) { - err = -ENOMEM; - goto errout; - } - bio->bi_bdev = inode->i_sb->s_bdev; - bio->bi_iter.bi_sector = - pblk << (inode->i_sb->s_blocksize_bits - 9); - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - ret = bio_add_page(bio, ciphertext_page, - inode->i_sb->s_blocksize, 0); - if (ret != inode->i_sb->s_blocksize) { - /* should never happen! */ - WARN_ON(1); - bio_put(bio); - err = -EIO; - goto errout; - } - err = submit_bio_wait(bio); - if ((err == 0) && bio->bi_error) - err = -EIO; - bio_put(bio); - if (err) - goto errout; - lblk++; - pblk++; - } - err = 0; -errout: - fscrypt_release_ctx(ctx); - return err; -} -EXPORT_SYMBOL(fscrypt_zeroout_range); - /* * Validate dentries for encrypted directories to make sure we aren't * potentially caching stale data after a key has been added or @@ -392,63 +374,6 @@ const struct dentry_operations fscrypt_d_ops = { }; EXPORT_SYMBOL(fscrypt_d_ops); -/* - * Call fscrypt_decrypt_page on every single page, reusing the encryption - * context. - */ -static void completion_pages(struct work_struct *work) -{ - struct fscrypt_ctx *ctx = - container_of(work, struct fscrypt_ctx, r.work); - struct bio *bio = ctx->r.bio; - struct bio_vec *bv; - int i; - - bio_for_each_segment_all(bv, bio, i) { - struct page *page = bv->bv_page; - int ret = fscrypt_decrypt_page(page); - - if (ret) { - WARN_ON_ONCE(1); - SetPageError(page); - } else { - SetPageUptodate(page); - } - unlock_page(page); - } - fscrypt_release_ctx(ctx); - bio_put(bio); -} - -void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio) -{ - INIT_WORK(&ctx->r.work, completion_pages); - ctx->r.bio = bio; - queue_work(fscrypt_read_workqueue, &ctx->r.work); -} -EXPORT_SYMBOL(fscrypt_decrypt_bio_pages); - -void fscrypt_pullback_bio_page(struct page **page, bool restore) -{ - struct fscrypt_ctx *ctx; - struct page *bounce_page; - - /* The bounce data pages are unmapped. */ - if ((*page)->mapping) - return; - - /* The bounce data page is unmapped. */ - bounce_page = *page; - ctx = (struct fscrypt_ctx *)page_private(bounce_page); - - /* restore control page */ - *page = ctx->w.control_page; - - if (restore) - fscrypt_restore_control_page(bounce_page); -} -EXPORT_SYMBOL(fscrypt_pullback_bio_page); - void fscrypt_restore_control_page(struct page *page) { struct fscrypt_ctx *ctx; @@ -474,17 +399,22 @@ static void fscrypt_destroy(void) /** * fscrypt_initialize() - allocate major buffers for fs encryption. + * @cop_flags: fscrypt operations flags * * We only call this when we start accessing encrypted files, since it * results in memory getting allocated that wouldn't otherwise be used. * * Return: Zero on success, non-zero otherwise. */ -int fscrypt_initialize(void) +int fscrypt_initialize(unsigned int cop_flags) { int i, res = -ENOMEM; - if (fscrypt_bounce_page_pool) + /* + * No need to allocate a bounce page pool if there already is one or + * this FS won't use it. + */ + if (cop_flags & FS_CFLG_OWN_PAGES || fscrypt_bounce_page_pool) return 0; mutex_lock(&fscrypt_init_mutex); @@ -513,7 +443,6 @@ fail: mutex_unlock(&fscrypt_init_mutex); return res; } -EXPORT_SYMBOL(fscrypt_initialize); /** * fscrypt_init() - Set up for fs encryption. @@ -555,6 +484,8 @@ static void __exit fscrypt_exit(void) destroy_workqueue(fscrypt_read_workqueue); kmem_cache_destroy(fscrypt_ctx_cachep); kmem_cache_destroy(fscrypt_info_cachep); + + fscrypt_essiv_cleanup(); } module_exit(fscrypt_exit); diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index d1bbdc9dda76..ad9f814fdead 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -12,7 +12,7 @@ #include #include -#include +#include "fscrypt_private.h" /** * fname_crypt_complete() - completion callback for filename crypto @@ -159,6 +159,8 @@ static int fname_decrypt(struct inode *inode, static const char *lookup_table = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; +#define BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3) + /** * digest_encode() - * @@ -209,7 +211,7 @@ static int digest_decode(const char *src, int len, char *dst) return cp - dst; } -u32 fscrypt_fname_encrypted_size(struct inode *inode, u32 ilen) +u32 fscrypt_fname_encrypted_size(const struct inode *inode, u32 ilen) { int padding = 32; struct fscrypt_info *ci = inode->i_crypt_info; @@ -227,14 +229,17 @@ EXPORT_SYMBOL(fscrypt_fname_encrypted_size); * Allocates an output buffer that is sufficient for the crypto operation * specified by the context and the direction. */ -int fscrypt_fname_alloc_buffer(struct inode *inode, +int fscrypt_fname_alloc_buffer(const struct inode *inode, u32 ilen, struct fscrypt_str *crypto_str) { - unsigned int olen = fscrypt_fname_encrypted_size(inode, ilen); + u32 olen = fscrypt_fname_encrypted_size(inode, ilen); + const u32 max_encoded_len = + max_t(u32, BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE), + 1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name))); crypto_str->len = olen; - if (olen < FS_FNAME_CRYPTO_DIGEST_SIZE * 2) - olen = FS_FNAME_CRYPTO_DIGEST_SIZE * 2; + olen = max(olen, max_encoded_len); + /* * Allocated buffer can hold one more character to null-terminate the * string @@ -266,6 +271,10 @@ EXPORT_SYMBOL(fscrypt_fname_free_buffer); * * The caller must have allocated sufficient memory for the @oname string. * + * If the key is available, we'll decrypt the disk name; otherwise, we'll encode + * it for presentation. Short names are directly base64-encoded, while long + * names are encoded in fscrypt_digested_name format. + * * Return: 0 on success, -errno on failure */ int fscrypt_fname_disk_to_usr(struct inode *inode, @@ -274,7 +283,7 @@ int fscrypt_fname_disk_to_usr(struct inode *inode, struct fscrypt_str *oname) { const struct qstr qname = FSTR_TO_QSTR(iname); - char buf[24]; + struct fscrypt_digested_name digested_name; if (fscrypt_is_dot_dotdot(&qname)) { oname->name[0] = '.'; @@ -289,20 +298,24 @@ int fscrypt_fname_disk_to_usr(struct inode *inode, if (inode->i_crypt_info) return fname_decrypt(inode, iname, oname); - if (iname->len <= FS_FNAME_CRYPTO_DIGEST_SIZE) { + if (iname->len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) { oname->len = digest_encode(iname->name, iname->len, oname->name); return 0; } if (hash) { - memcpy(buf, &hash, 4); - memcpy(buf + 4, &minor_hash, 4); + digested_name.hash = hash; + digested_name.minor_hash = minor_hash; } else { - memset(buf, 0, 8); + digested_name.hash = 0; + digested_name.minor_hash = 0; } - memcpy(buf + 8, iname->name + ((iname->len - 17) & ~15), 16); + memcpy(digested_name.digest, + FSCRYPT_FNAME_DIGEST(iname->name, iname->len), + FSCRYPT_FNAME_DIGEST_SIZE); oname->name[0] = '_'; - oname->len = 1 + digest_encode(buf, 24, oname->name + 1); + oname->len = 1 + digest_encode((const char *)&digested_name, + sizeof(digested_name), oname->name + 1); return 0; } EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); @@ -332,14 +345,39 @@ int fscrypt_fname_usr_to_disk(struct inode *inode, * in a directory. Consequently, a user space name cannot be mapped to * a disk-space name */ - return -EACCES; + return -ENOKEY; } EXPORT_SYMBOL(fscrypt_fname_usr_to_disk); +/** + * fscrypt_setup_filename() - prepare to search a possibly encrypted directory + * @dir: the directory that will be searched + * @iname: the user-provided filename being searched for + * @lookup: 1 if we're allowed to proceed without the key because it's + * ->lookup() or we're finding the dir_entry for deletion; 0 if we cannot + * proceed without the key because we're going to create the dir_entry. + * @fname: the filename information to be filled in + * + * Given a user-provided filename @iname, this function sets @fname->disk_name + * to the name that would be stored in the on-disk directory entry, if possible. + * If the directory is unencrypted this is simply @iname. Else, if we have the + * directory's encryption key, then @iname is the plaintext, so we encrypt it to + * get the disk_name. + * + * Else, for keyless @lookup operations, @iname is the presented ciphertext, so + * we decode it to get either the ciphertext disk_name (for short names) or the + * fscrypt_digested_name (for long names). Non-@lookup operations will be + * impossible in this case, so we fail them with ENOKEY. + * + * If successful, fscrypt_free_filename() must be called later to clean up. + * + * Return: 0 on success, -errno on failure + */ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct fscrypt_name *fname) { - int ret = 0, bigname = 0; + int ret; + int digested; memset(fname, 0, sizeof(struct fscrypt_name)); fname->usr_fname = iname; @@ -367,31 +405,43 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, return 0; } if (!lookup) - return -EACCES; + return -ENOKEY; /* * We don't have the key and we are doing a lookup; decode the * user-supplied name */ - if (iname->name[0] == '_') - bigname = 1; - if ((bigname && (iname->len != 33)) || (!bigname && (iname->len > 43))) - return -ENOENT; + if (iname->name[0] == '_') { + if (iname->len != + 1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name))) + return -ENOENT; + digested = 1; + } else { + if (iname->len > + BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE)) + return -ENOENT; + digested = 0; + } - fname->crypto_buf.name = kmalloc(32, GFP_KERNEL); + fname->crypto_buf.name = + kmalloc(max_t(size_t, FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE, + sizeof(struct fscrypt_digested_name)), + GFP_KERNEL); if (fname->crypto_buf.name == NULL) return -ENOMEM; - ret = digest_decode(iname->name + bigname, iname->len - bigname, + ret = digest_decode(iname->name + digested, iname->len - digested, fname->crypto_buf.name); if (ret < 0) { ret = -ENOENT; goto errout; } fname->crypto_buf.len = ret; - if (bigname) { - memcpy(&fname->hash, fname->crypto_buf.name, 4); - memcpy(&fname->minor_hash, fname->crypto_buf.name + 4, 4); + if (digested) { + const struct fscrypt_digested_name *n = + (const void *)fname->crypto_buf.name; + fname->hash = n->hash; + fname->minor_hash = n->minor_hash; } else { fname->disk_name.name = fname->crypto_buf.name; fname->disk_name.len = fname->crypto_buf.len; @@ -403,12 +453,3 @@ errout: return ret; } EXPORT_SYMBOL(fscrypt_setup_filename); - -void fscrypt_free_filename(struct fscrypt_name *fname) -{ - kfree(fname->crypto_buf.name); - fname->crypto_buf.name = NULL; - fname->usr_fname = NULL; - fname->disk_name.name = NULL; -} -EXPORT_SYMBOL(fscrypt_free_filename); diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h new file mode 100644 index 000000000000..a1d5021c31ef --- /dev/null +++ b/fs/crypto/fscrypt_private.h @@ -0,0 +1,97 @@ +/* + * fscrypt_private.h + * + * Copyright (C) 2015, Google, Inc. + * + * This contains encryption key functions. + * + * Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015. + */ + +#ifndef _FSCRYPT_PRIVATE_H +#define _FSCRYPT_PRIVATE_H + +#include +#include + +/* Encryption parameters */ +#define FS_IV_SIZE 16 +#define FS_AES_128_ECB_KEY_SIZE 16 +#define FS_AES_128_CBC_KEY_SIZE 16 +#define FS_AES_128_CTS_KEY_SIZE 16 +#define FS_AES_256_GCM_KEY_SIZE 32 +#define FS_AES_256_CBC_KEY_SIZE 32 +#define FS_AES_256_CTS_KEY_SIZE 32 +#define FS_AES_256_XTS_KEY_SIZE 64 + +#define FS_KEY_DERIVATION_NONCE_SIZE 16 + +/** + * Encryption context for inode + * + * Protector format: + * 1 byte: Protector format (1 = this version) + * 1 byte: File contents encryption mode + * 1 byte: File names encryption mode + * 1 byte: Flags + * 8 bytes: Master Key descriptor + * 16 bytes: Encryption Key derivation nonce + */ +struct fscrypt_context { + u8 format; + u8 contents_encryption_mode; + u8 filenames_encryption_mode; + u8 flags; + u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; + u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE]; +} __packed; + +#define FS_ENCRYPTION_CONTEXT_FORMAT_V1 1 + +/* + * A pointer to this structure is stored in the file system's in-core + * representation of an inode. + */ +struct fscrypt_info { + u8 ci_data_mode; + u8 ci_filename_mode; + u8 ci_flags; + struct crypto_skcipher *ci_ctfm; + struct crypto_cipher *ci_essiv_tfm; + u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE]; +}; + +typedef enum { + FS_DECRYPT = 0, + FS_ENCRYPT, +} fscrypt_direction_t; + +#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 +#define FS_CTX_HAS_BOUNCE_BUFFER_FL 0x00000002 + +struct fscrypt_completion_result { + struct completion completion; + int res; +}; + +#define DECLARE_FS_COMPLETION_RESULT(ecr) \ + struct fscrypt_completion_result ecr = { \ + COMPLETION_INITIALIZER_ONSTACK((ecr).completion), 0 } + + +/* crypto.c */ +extern int fscrypt_initialize(unsigned int cop_flags); +extern struct workqueue_struct *fscrypt_read_workqueue; +extern int fscrypt_do_page_crypto(const struct inode *inode, + fscrypt_direction_t rw, u64 lblk_num, + struct page *src_page, + struct page *dest_page, + unsigned int len, unsigned int offs, + gfp_t gfp_flags); +extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx, + gfp_t gfp_flags); + +/* keyinfo.c */ +extern void __exit fscrypt_essiv_cleanup(void); + +#endif /* _FSCRYPT_PRIVATE_H */ diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index bb4606368eb1..8e704d12a1cf 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -10,7 +10,12 @@ #include #include -#include +#include +#include +#include +#include "fscrypt_private.h" + +static struct crypto_shash *essiv_hash_tfm; static void derive_crypt_complete(struct crypto_async_request *req, int rc) { @@ -27,13 +32,13 @@ static void derive_crypt_complete(struct crypto_async_request *req, int rc) * derive_key_aes() - Derive a key using AES-128-ECB * @deriving_key: Encryption key used for derivation. * @source_key: Source key to which to apply derivation. - * @derived_key: Derived key. + * @derived_raw_key: Derived raw key. * * Return: Zero on success; non-zero otherwise. */ static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE], - u8 source_key[FS_AES_256_XTS_KEY_SIZE], - u8 derived_key[FS_AES_256_XTS_KEY_SIZE]) + const struct fscrypt_key *source_key, + u8 derived_raw_key[FS_MAX_KEY_SIZE]) { int res = 0; struct skcipher_request *req = NULL; @@ -60,10 +65,10 @@ static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE], if (res < 0) goto out; - sg_init_one(&src_sg, source_key, FS_AES_256_XTS_KEY_SIZE); - sg_init_one(&dst_sg, derived_key, FS_AES_256_XTS_KEY_SIZE); - skcipher_request_set_crypt(req, &src_sg, &dst_sg, - FS_AES_256_XTS_KEY_SIZE, NULL); + sg_init_one(&src_sg, source_key->raw, source_key->size); + sg_init_one(&dst_sg, derived_raw_key, source_key->size); + skcipher_request_set_crypt(req, &src_sg, &dst_sg, source_key->size, + NULL); res = crypto_skcipher_encrypt(req); if (res == -EINPROGRESS || res == -EBUSY) { wait_for_completion(&ecr.completion); @@ -77,26 +82,22 @@ out: static int validate_user_key(struct fscrypt_info *crypt_info, struct fscrypt_context *ctx, u8 *raw_key, - u8 *prefix, int prefix_size) + const char *prefix, int min_keysize) { - u8 *full_key_descriptor; + char *description; struct key *keyring_key; struct fscrypt_key *master_key; const struct user_key_payload *ukp; - int full_key_len = prefix_size + (FS_KEY_DESCRIPTOR_SIZE * 2) + 1; int res; - full_key_descriptor = kmalloc(full_key_len, GFP_NOFS); - if (!full_key_descriptor) + description = kasprintf(GFP_NOFS, "%s%*phN", prefix, + FS_KEY_DESCRIPTOR_SIZE, + ctx->master_key_descriptor); + if (!description) return -ENOMEM; - memcpy(full_key_descriptor, prefix, prefix_size); - sprintf(full_key_descriptor + prefix_size, - "%*phN", FS_KEY_DESCRIPTOR_SIZE, - ctx->master_key_descriptor); - full_key_descriptor[full_key_len - 1] = '\0'; - keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL); - kfree(full_key_descriptor); + keyring_key = request_key(&key_type_logon, description, NULL); + kfree(description); if (IS_ERR(keyring_key)) return PTR_ERR(keyring_key); down_read(&keyring_key->sem); @@ -107,7 +108,12 @@ static int validate_user_key(struct fscrypt_info *crypt_info, res = -ENOKEY; goto out; } - ukp = user_key_payload(keyring_key); + ukp = user_key_payload_locked(keyring_key); + if (!ukp) { + /* key was revoked before we acquired its semaphore */ + res = -EKEYREVOKED; + goto out; + } if (ukp->datalen != sizeof(struct fscrypt_key)) { res = -EINVAL; goto out; @@ -115,50 +121,60 @@ static int validate_user_key(struct fscrypt_info *crypt_info, master_key = (struct fscrypt_key *)ukp->data; BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE); - if (master_key->size != FS_AES_256_XTS_KEY_SIZE) { + if (master_key->size < min_keysize || master_key->size > FS_MAX_KEY_SIZE + || master_key->size % AES_BLOCK_SIZE != 0) { printk_once(KERN_WARNING "%s: key size incorrect: %d\n", __func__, master_key->size); res = -ENOKEY; goto out; } - res = derive_key_aes(ctx->nonce, master_key->raw, raw_key); + res = derive_key_aes(ctx->nonce, master_key, raw_key); out: up_read(&keyring_key->sem); key_put(keyring_key); return res; } +static const struct { + const char *cipher_str; + int keysize; +} available_modes[] = { + [FS_ENCRYPTION_MODE_AES_256_XTS] = { "xts(aes)", + FS_AES_256_XTS_KEY_SIZE }, + [FS_ENCRYPTION_MODE_AES_256_CTS] = { "cts(cbc(aes))", + FS_AES_256_CTS_KEY_SIZE }, + [FS_ENCRYPTION_MODE_AES_128_CBC] = { "cbc(aes)", + FS_AES_128_CBC_KEY_SIZE }, + [FS_ENCRYPTION_MODE_AES_128_CTS] = { "cts(cbc(aes))", + FS_AES_128_CTS_KEY_SIZE }, +}; + static int determine_cipher_type(struct fscrypt_info *ci, struct inode *inode, const char **cipher_str_ret, int *keysize_ret) { + u32 mode; + + if (!fscrypt_valid_enc_modes(ci->ci_data_mode, ci->ci_filename_mode)) { + pr_warn_ratelimited("fscrypt: inode %lu uses unsupported encryption modes (contents mode %d, filenames mode %d)\n", + inode->i_ino, + ci->ci_data_mode, ci->ci_filename_mode); + return -EINVAL; + } + if (S_ISREG(inode->i_mode)) { - if (ci->ci_data_mode == FS_ENCRYPTION_MODE_AES_256_XTS) { - *cipher_str_ret = "xts(aes)"; - *keysize_ret = FS_AES_256_XTS_KEY_SIZE; - return 0; - } - pr_warn_once("fscrypto: unsupported contents encryption mode " - "%d for inode %lu\n", - ci->ci_data_mode, inode->i_ino); - return -ENOKEY; + mode = ci->ci_data_mode; + } else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) { + mode = ci->ci_filename_mode; + } else { + WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n", + inode->i_ino, (inode->i_mode & S_IFMT)); + return -EINVAL; } - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) { - if (ci->ci_filename_mode == FS_ENCRYPTION_MODE_AES_256_CTS) { - *cipher_str_ret = "cts(cbc(aes))"; - *keysize_ret = FS_AES_256_CTS_KEY_SIZE; - return 0; - } - pr_warn_once("fscrypto: unsupported filenames encryption mode " - "%d for inode %lu\n", - ci->ci_filename_mode, inode->i_ino); - return -ENOKEY; - } - - pr_warn_once("fscrypto: unsupported file type %d for inode %lu\n", - (inode->i_mode & S_IFMT), inode->i_ino); - return -ENOKEY; + *cipher_str_ret = available_modes[mode].cipher_str; + *keysize_ret = available_modes[mode].keysize; + return 0; } static void put_crypt_info(struct fscrypt_info *ci) @@ -167,9 +183,76 @@ static void put_crypt_info(struct fscrypt_info *ci) return; crypto_free_skcipher(ci->ci_ctfm); + crypto_free_cipher(ci->ci_essiv_tfm); kmem_cache_free(fscrypt_info_cachep, ci); } +static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt) +{ + struct crypto_shash *tfm = READ_ONCE(essiv_hash_tfm); + + /* init hash transform on demand */ + if (unlikely(!tfm)) { + struct crypto_shash *prev_tfm; + + tfm = crypto_alloc_shash("sha256", 0, 0); + if (IS_ERR(tfm)) { + pr_warn_ratelimited("fscrypt: error allocating SHA-256 transform: %ld\n", + PTR_ERR(tfm)); + return PTR_ERR(tfm); + } + prev_tfm = cmpxchg(&essiv_hash_tfm, NULL, tfm); + if (prev_tfm) { + crypto_free_shash(tfm); + tfm = prev_tfm; + } + } + + { + SHASH_DESC_ON_STACK(desc, tfm); + desc->tfm = tfm; + desc->flags = 0; + + return crypto_shash_digest(desc, key, keysize, salt); + } +} + +static int init_essiv_generator(struct fscrypt_info *ci, const u8 *raw_key, + int keysize) +{ + int err; + struct crypto_cipher *essiv_tfm; + u8 salt[SHA256_DIGEST_SIZE]; + + essiv_tfm = crypto_alloc_cipher("aes", 0, 0); + if (IS_ERR(essiv_tfm)) + return PTR_ERR(essiv_tfm); + + ci->ci_essiv_tfm = essiv_tfm; + + err = derive_essiv_salt(raw_key, keysize, salt); + if (err) + goto out; + + /* + * Using SHA256 to derive the salt/key will result in AES-256 being + * used for IV generation. File contents encryption will still use the + * configured keysize (AES-128) nevertheless. + */ + err = crypto_cipher_setkey(essiv_tfm, salt, sizeof(salt)); + if (err) + goto out; + +out: + memzero_explicit(salt, sizeof(salt)); + return err; +} + +void __exit fscrypt_essiv_cleanup(void) +{ + crypto_free_shash(essiv_hash_tfm); +} + int fscrypt_get_encryption_info(struct inode *inode) { struct fscrypt_info *crypt_info; @@ -183,21 +266,21 @@ int fscrypt_get_encryption_info(struct inode *inode) if (inode->i_crypt_info) return 0; - res = fscrypt_initialize(); + res = fscrypt_initialize(inode->i_sb->s_cop->flags); if (res) return res; - if (!inode->i_sb->s_cop->get_context) - return -EOPNOTSUPP; - res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); if (res < 0) { - if (!fscrypt_dummy_context_enabled(inode)) + if (!fscrypt_dummy_context_enabled(inode) || + inode->i_sb->s_cop->is_encrypted(inode)) return res; + /* Fake up a context for an unencrypted directory */ + memset(&ctx, 0, sizeof(ctx)); ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1; ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS; ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS; - ctx.flags = 0; + memset(ctx.master_key_descriptor, 0x42, FS_KEY_DESCRIPTOR_SIZE); } else if (res != sizeof(ctx)) { return -EINVAL; } @@ -216,6 +299,7 @@ int fscrypt_get_encryption_info(struct inode *inode) crypt_info->ci_data_mode = ctx.contents_encryption_mode; crypt_info->ci_filename_mode = ctx.filenames_encryption_mode; crypt_info->ci_ctfm = NULL; + crypt_info->ci_essiv_tfm = NULL; memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, sizeof(crypt_info->ci_master_key)); @@ -232,20 +316,12 @@ int fscrypt_get_encryption_info(struct inode *inode) if (!raw_key) goto out; - if (fscrypt_dummy_context_enabled(inode)) { - memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE); - goto got_key; - } - - res = validate_user_key(crypt_info, &ctx, raw_key, - FS_KEY_DESC_PREFIX, FS_KEY_DESC_PREFIX_SIZE); + res = validate_user_key(crypt_info, &ctx, raw_key, FS_KEY_DESC_PREFIX, + keysize); if (res && inode->i_sb->s_cop->key_prefix) { - u8 *prefix = NULL; - int prefix_size, res2; - - prefix_size = inode->i_sb->s_cop->key_prefix(inode, &prefix); - res2 = validate_user_key(crypt_info, &ctx, raw_key, - prefix, prefix_size); + int res2 = validate_user_key(crypt_info, &ctx, raw_key, + inode->i_sb->s_cop->key_prefix, + keysize); if (res2) { if (res2 == -ENOKEY) res = -ENOKEY; @@ -254,22 +330,33 @@ int fscrypt_get_encryption_info(struct inode *inode) } else if (res) { goto out; } -got_key: ctfm = crypto_alloc_skcipher(cipher_str, 0, 0); if (!ctfm || IS_ERR(ctfm)) { res = ctfm ? PTR_ERR(ctfm) : -ENOMEM; - printk(KERN_DEBUG - "%s: error %d (inode %u) allocating crypto tfm\n", - __func__, res, (unsigned) inode->i_ino); + pr_debug("%s: error %d (inode %lu) allocating crypto tfm\n", + __func__, res, inode->i_ino); goto out; } crypt_info->ci_ctfm = ctfm; crypto_skcipher_clear_flags(ctfm, ~0); crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY); + /* + * if the provided key is longer than keysize, we use the first + * keysize bytes of the derived key only + */ res = crypto_skcipher_setkey(ctfm, raw_key, keysize); if (res) goto out; + if (S_ISREG(inode->i_mode) && + crypt_info->ci_data_mode == FS_ENCRYPTION_MODE_AES_128_CBC) { + res = init_essiv_generator(crypt_info, raw_key, keysize); + if (res) { + pr_debug("%s: error %d (inode %lu) allocating essiv tfm\n", + __func__, res, inode->i_ino); + goto out; + } + } if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) == NULL) crypt_info = NULL; out: diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index bb4e209bd809..9914d51dff86 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -10,76 +10,37 @@ #include #include -#include #include - -static int inode_has_encryption_context(struct inode *inode) -{ - if (!inode->i_sb->s_cop->get_context) - return 0; - return (inode->i_sb->s_cop->get_context(inode, NULL, 0L) > 0); -} +#include "fscrypt_private.h" /* - * check whether the policy is consistent with the encryption context - * for the inode + * check whether an encryption policy is consistent with an encryption context */ -static int is_encryption_context_consistent_with_policy(struct inode *inode, +static bool is_encryption_context_consistent_with_policy( + const struct fscrypt_context *ctx, const struct fscrypt_policy *policy) { - struct fscrypt_context ctx; - int res; - - if (!inode->i_sb->s_cop->get_context) - return 0; - - res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); - if (res != sizeof(ctx)) - return 0; - - return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor, - FS_KEY_DESCRIPTOR_SIZE) == 0 && - (ctx.flags == policy->flags) && - (ctx.contents_encryption_mode == - policy->contents_encryption_mode) && - (ctx.filenames_encryption_mode == - policy->filenames_encryption_mode)); + return memcmp(ctx->master_key_descriptor, policy->master_key_descriptor, + FS_KEY_DESCRIPTOR_SIZE) == 0 && + (ctx->flags == policy->flags) && + (ctx->contents_encryption_mode == + policy->contents_encryption_mode) && + (ctx->filenames_encryption_mode == + policy->filenames_encryption_mode); } static int create_encryption_context_from_policy(struct inode *inode, const struct fscrypt_policy *policy) { struct fscrypt_context ctx; - int res; - - if (!inode->i_sb->s_cop->set_context) - return -EOPNOTSUPP; - - if (inode->i_sb->s_cop->prepare_context) { - res = inode->i_sb->s_cop->prepare_context(inode); - if (res) - return res; - } ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1; memcpy(ctx.master_key_descriptor, policy->master_key_descriptor, FS_KEY_DESCRIPTOR_SIZE); - if (!fscrypt_valid_contents_enc_mode( - policy->contents_encryption_mode)) { - printk(KERN_WARNING - "%s: Invalid contents encryption mode %d\n", __func__, - policy->contents_encryption_mode); + if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode, + policy->filenames_encryption_mode)) return -EINVAL; - } - - if (!fscrypt_valid_filenames_enc_mode( - policy->filenames_encryption_mode)) { - printk(KERN_WARNING - "%s: Invalid filenames encryption mode %d\n", __func__, - policy->filenames_encryption_mode); - return -EINVAL; - } if (policy->flags & ~FS_POLICY_FLAGS_VALID) return -EINVAL; @@ -93,16 +54,20 @@ static int create_encryption_context_from_policy(struct inode *inode, return inode->i_sb->s_cop->set_context(inode, &ctx, sizeof(ctx), NULL); } -int fscrypt_process_policy(struct file *filp, - const struct fscrypt_policy *policy) +int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg) { + struct fscrypt_policy policy; struct inode *inode = file_inode(filp); int ret; + struct fscrypt_context ctx; + + if (copy_from_user(&policy, arg, sizeof(policy))) + return -EFAULT; if (!inode_owner_or_capable(inode)) return -EACCES; - if (policy->version != 0) + if (policy.version != 0) return -EINVAL; ret = mnt_want_write_file(filp); @@ -111,22 +76,23 @@ int fscrypt_process_policy(struct file *filp, inode_lock(inode); - if (!inode_has_encryption_context(inode)) { + ret = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); + if (ret == -ENODATA) { if (!S_ISDIR(inode->i_mode)) - ret = -EINVAL; - else if (!inode->i_sb->s_cop->empty_dir) - ret = -EOPNOTSUPP; + ret = -ENOTDIR; else if (!inode->i_sb->s_cop->empty_dir(inode)) ret = -ENOTEMPTY; else ret = create_encryption_context_from_policy(inode, - policy); - } else if (!is_encryption_context_consistent_with_policy(inode, - policy)) { - printk(KERN_WARNING - "%s: Policy inconsistent with encryption context\n", - __func__); - ret = -EINVAL; + &policy); + } else if (ret == sizeof(ctx) && + is_encryption_context_consistent_with_policy(&ctx, + &policy)) { + /* The file already uses the same encryption policy. */ + ret = 0; + } else if (ret >= 0 || ret == -ERANGE) { + /* The file already uses a different encryption policy. */ + ret = -EEXIST; } inode_unlock(inode); @@ -134,32 +100,38 @@ int fscrypt_process_policy(struct file *filp, mnt_drop_write_file(filp); return ret; } -EXPORT_SYMBOL(fscrypt_process_policy); +EXPORT_SYMBOL(fscrypt_ioctl_set_policy); -int fscrypt_get_policy(struct inode *inode, struct fscrypt_policy *policy) +int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg) { + struct inode *inode = file_inode(filp); struct fscrypt_context ctx; + struct fscrypt_policy policy; int res; - if (!inode->i_sb->s_cop->get_context || - !inode->i_sb->s_cop->is_encrypted(inode)) + if (!inode->i_sb->s_cop->is_encrypted(inode)) return -ENODATA; res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); + if (res < 0 && res != -ERANGE) + return res; if (res != sizeof(ctx)) - return -ENODATA; + return -EINVAL; if (ctx.format != FS_ENCRYPTION_CONTEXT_FORMAT_V1) return -EINVAL; - policy->version = 0; - policy->contents_encryption_mode = ctx.contents_encryption_mode; - policy->filenames_encryption_mode = ctx.filenames_encryption_mode; - policy->flags = ctx.flags; - memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor, + policy.version = 0; + policy.contents_encryption_mode = ctx.contents_encryption_mode; + policy.filenames_encryption_mode = ctx.filenames_encryption_mode; + policy.flags = ctx.flags; + memcpy(policy.master_key_descriptor, ctx.master_key_descriptor, FS_KEY_DESCRIPTOR_SIZE); + + if (copy_to_user(arg, &policy, sizeof(policy))) + return -EFAULT; return 0; } -EXPORT_SYMBOL(fscrypt_get_policy); +EXPORT_SYMBOL(fscrypt_ioctl_get_policy); /** * fscrypt_has_permitted_context() - is a file's encryption policy permitted @@ -258,9 +230,9 @@ EXPORT_SYMBOL(fscrypt_has_permitted_context); * @parent: Parent inode from which the context is inherited. * @child: Child inode that inherits the context from @parent. * @fs_data: private data given by FS. - * @preload: preload child i_crypt_info + * @preload: preload child i_crypt_info if true * - * Return: Zero on success, non-zero otherwise + * Return: 0 on success, -errno on failure */ int fscrypt_inherit_context(struct inode *parent, struct inode *child, void *fs_data, bool preload) @@ -269,9 +241,6 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child, struct fscrypt_info *ci; int res; - if (!parent->i_sb->s_cop->set_context) - return -EOPNOTSUPP; - res = fscrypt_get_encryption_info(parent); if (res < 0) return res; @@ -281,19 +250,11 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child, return -ENOKEY; ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1; - if (fscrypt_dummy_context_enabled(parent)) { - ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS; - ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS; - ctx.flags = 0; - memset(ctx.master_key_descriptor, 0x42, FS_KEY_DESCRIPTOR_SIZE); - res = 0; - } else { - ctx.contents_encryption_mode = ci->ci_data_mode; - ctx.filenames_encryption_mode = ci->ci_filename_mode; - ctx.flags = ci->ci_flags; - memcpy(ctx.master_key_descriptor, ci->ci_master_key, - FS_KEY_DESCRIPTOR_SIZE); - } + ctx.contents_encryption_mode = ci->ci_data_mode; + ctx.filenames_encryption_mode = ci->ci_filename_mode; + ctx.flags = ci->ci_flags; + memcpy(ctx.master_key_descriptor, ci->ci_master_key, + FS_KEY_DESCRIPTOR_SIZE); get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE); res = parent->i_sb->s_cop->set_context(child, &ctx, sizeof(ctx), fs_data); diff --git a/fs/direct-io.c b/fs/direct-io.c index c60756e89833..c6220a2daefd 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -835,7 +835,8 @@ out: */ if (sdio->boundary) { ret = dio_send_cur_page(dio, sdio, map_bh); - dio_bio_submit(dio, sdio); + if (sdio->bio) + dio_bio_submit(dio, sdio); put_page(sdio->cur_page); sdio->cur_page = NULL; } diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 599a29237cfe..95c1c8d34539 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -117,7 +117,7 @@ ecryptfs_get_key_payload_data(struct key *key) auth_tok = ecryptfs_get_encrypted_key_payload_data(key); if (!auth_tok) - return (struct ecryptfs_auth_tok *)user_key_payload(key)->data; + return (struct ecryptfs_auth_tok *)user_key_payload_locked(key)->data; else return auth_tok; } diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index dfa519979038..dfd01ca1a60a 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -192,13 +192,6 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type, switch (type) { case ACL_TYPE_ACCESS: name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { - error = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (error) - return error; - inode->i_ctime = ext4_current_time(inode); - ext4_mark_inode_dirty(handle, inode); - } break; case ACL_TYPE_DEFAULT: @@ -231,6 +224,8 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type) { handle_t *handle; int error, retries = 0; + umode_t mode = inode->i_mode; + int update_mode = 0; retry: handle = ext4_journal_start(inode, EXT4_HT_XATTR, @@ -238,7 +233,20 @@ retry: if (IS_ERR(handle)) return PTR_ERR(handle); + if ((type == ACL_TYPE_ACCESS) && acl) { + error = posix_acl_update_mode(inode, &mode, &acl); + if (error) + goto out_stop; + update_mode = 1; + } + error = __ext4_set_acl(handle, inode, type, acl); + if (!error && update_mode) { + inode->i_mode = mode; + inode->i_ctime = ext4_current_time(inode); + ext4_mark_inode_dirty(handle, inode); + } +out_stop: ext4_journal_stop(handle); if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 20ee0e4a5829..be2d9ae0a749 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -32,7 +32,11 @@ #include #include #include -#include +#ifdef CONFIG_EXT4_FS_ENCRYPTION +#include +#else +#include +#endif #include #include #ifdef __KERNEL__ @@ -1342,11 +1346,6 @@ struct ext4_super_block { /* Number of quota types we support */ #define EXT4_MAXQUOTAS 3 -#ifdef CONFIG_EXT4_FS_ENCRYPTION -#define EXT4_KEY_DESC_PREFIX "ext4:" -#define EXT4_KEY_DESC_PREFIX_SIZE 5 -#endif - /* * fourth extended-fs super-block data in memory */ @@ -1516,12 +1515,6 @@ struct ext4_sb_info { /* Barrier between changing inodes' journal flags and writepages ops. */ struct percpu_rw_semaphore s_journal_flag_rwsem; - - /* Encryption support */ -#ifdef CONFIG_EXT4_FS_ENCRYPTION - u8 key_prefix[EXT4_KEY_DESC_PREFIX_SIZE]; - u8 key_prefix_size; -#endif }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) @@ -2277,11 +2270,6 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb, struct ext4_group_desc *gdp); ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); -static inline int ext4_sb_has_crypto(struct super_block *sb) -{ - return ext4_has_feature_encrypt(sb); -} - static inline bool ext4_encrypted_inode(struct inode *inode) { return ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT); @@ -2330,28 +2318,6 @@ static inline int ext4_fname_setup_filename(struct inode *dir, } static inline void ext4_fname_free_filename(struct ext4_filename *fname) { } -#define fscrypt_set_d_op(i) -#define fscrypt_get_ctx fscrypt_notsupp_get_ctx -#define fscrypt_release_ctx fscrypt_notsupp_release_ctx -#define fscrypt_encrypt_page fscrypt_notsupp_encrypt_page -#define fscrypt_decrypt_page fscrypt_notsupp_decrypt_page -#define fscrypt_decrypt_bio_pages fscrypt_notsupp_decrypt_bio_pages -#define fscrypt_pullback_bio_page fscrypt_notsupp_pullback_bio_page -#define fscrypt_restore_control_page fscrypt_notsupp_restore_control_page -#define fscrypt_zeroout_range fscrypt_notsupp_zeroout_range -#define fscrypt_process_policy fscrypt_notsupp_process_policy -#define fscrypt_get_policy fscrypt_notsupp_get_policy -#define fscrypt_has_permitted_context fscrypt_notsupp_has_permitted_context -#define fscrypt_inherit_context fscrypt_notsupp_inherit_context -#define fscrypt_get_encryption_info fscrypt_notsupp_get_encryption_info -#define fscrypt_put_encryption_info fscrypt_notsupp_put_encryption_info -#define fscrypt_setup_filename fscrypt_notsupp_setup_filename -#define fscrypt_free_filename fscrypt_notsupp_free_filename -#define fscrypt_fname_encrypted_size fscrypt_notsupp_fname_encrypted_size -#define fscrypt_fname_alloc_buffer fscrypt_notsupp_fname_alloc_buffer -#define fscrypt_fname_free_buffer fscrypt_notsupp_fname_free_buffer -#define fscrypt_fname_disk_to_usr fscrypt_notsupp_fname_disk_to_usr -#define fscrypt_fname_usr_to_disk fscrypt_notsupp_fname_usr_to_disk #endif /* dir.c */ @@ -2373,17 +2339,16 @@ extern int ext4_find_dest_de(struct inode *dir, struct inode *inode, void *buf, int buf_size, struct ext4_filename *fname, struct ext4_dir_entry_2 **dest_de); -int ext4_insert_dentry(struct inode *dir, - struct inode *inode, - struct ext4_dir_entry_2 *de, - int buf_size, - struct ext4_filename *fname); +void ext4_insert_dentry(struct inode *inode, + struct ext4_dir_entry_2 *de, + int buf_size, + struct ext4_filename *fname); static inline void ext4_update_dx_flag(struct inode *inode) { if (!ext4_has_feature_dir_index(inode->i_sb)) ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); } -static unsigned char ext4_filetype_table[] = { +static const unsigned char ext4_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK }; @@ -3071,7 +3036,7 @@ extern int ext4_handle_dirty_dirent_node(handle_t *handle, struct inode *inode, struct buffer_head *bh); #define S_SHIFT 12 -static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = { +static const unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE, [S_IFDIR >> S_SHIFT] = EXT4_FT_DIR, [S_IFCHR >> S_SHIFT] = EXT4_FT_CHRDEV, diff --git a/fs/ext4/file.c b/fs/ext4/file.c index d17d12ed6f73..510e66422f04 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -527,7 +527,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) inode_lock(inode); isize = i_size_read(inode); - if (offset >= isize) { + if (offset < 0 || offset >= isize) { inode_unlock(inode); return -ENXIO; } @@ -590,7 +590,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) inode_lock(inode); isize = i_size_read(inode); - if (offset >= isize) { + if (offset < 0 || offset >= isize) { inode_unlock(inode); return -ENXIO; } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 170421edfdfe..2cec6059e886 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -771,7 +771,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, if (err) return ERR_PTR(err); if (!fscrypt_has_encryption_key(dir)) - return ERR_PTR(-EPERM); + return ERR_PTR(-ENOKEY); if (!handle) nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb); encrypt = 1; @@ -1093,6 +1093,17 @@ got: if (err) goto fail_drop; + /* + * Since the encryption xattr will always be unique, create it first so + * that it's less likely to end up in an external xattr block and + * prevent its deduplication. + */ + if (encrypt) { + err = fscrypt_inherit_context(dir, inode, handle, true); + if (err) + goto fail_free_drop; + } + err = ext4_init_acl(handle, inode, dir); if (err) goto fail_free_drop; @@ -1114,13 +1125,6 @@ got: ei->i_datasync_tid = handle->h_transaction->t_tid; } - if (encrypt) { - /* give pointer to avoid set_context with journal ops. */ - err = fscrypt_inherit_context(dir, inode, &encrypt, true); - if (err) - goto fail_free_drop; - } - err = ext4_mark_inode_dirty(handle, inode); if (err) { ext4_std_error(sb, err); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index a5807fd03b34..4d78b932c320 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1035,7 +1035,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle, err = ext4_journal_get_write_access(handle, iloc->bh); if (err) return err; - ext4_insert_dentry(dir, inode, de, inline_size, fname); + ext4_insert_dentry(inode, de, inline_size, fname); ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c08d49981b7a..d887e32e88a0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1166,7 +1166,8 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, if (unlikely(err)) page_zero_new_buffers(page, from, to); else if (decrypt) - err = fscrypt_decrypt_page(page); + err = fscrypt_decrypt_page(page->mapping->host, page, + PAGE_SIZE, 0, page->index); return err; } #endif @@ -2120,15 +2121,29 @@ static int ext4_writepage(struct page *page, static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) { int len; - loff_t size = i_size_read(mpd->inode); + loff_t size; int err; BUG_ON(page->index != mpd->first_page); + clear_page_dirty_for_io(page); + /* + * We have to be very careful here! Nothing protects writeback path + * against i_size changes and the page can be writeably mapped into + * page tables. So an application can be growing i_size and writing + * data through mmap while writeback runs. clear_page_dirty_for_io() + * write-protects our page in page tables and the page cannot get + * written to again until we release page lock. So only after + * clear_page_dirty_for_io() we are safe to sample i_size for + * ext4_bio_write_page() to zero-out tail of the written page. We rely + * on the barrier provided by TestClearPageDirty in + * clear_page_dirty_for_io() to make sure i_size is really sampled only + * after page tables are updated. + */ + size = i_size_read(mpd->inode); if (page->index == size >> PAGE_SHIFT) len = size & ~PAGE_MASK; else len = PAGE_SIZE; - clear_page_dirty_for_io(page); err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false); if (!err) mpd->wbc->nr_to_write--; @@ -3810,7 +3825,8 @@ static int __ext4_block_zero_page_range(handle_t *handle, /* We expect the key to be set. */ BUG_ON(!fscrypt_has_encryption_key(inode)); BUG_ON(blocksize != PAGE_SIZE); - WARN_ON_ONCE(fscrypt_decrypt_page(page)); + WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host, + page, PAGE_SIZE, 0, page->index)); } } if (ext4_should_journal_data(inode)) { diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index cec9280950b8..15ca15cb9f28 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -191,6 +191,7 @@ journal_err_out: return err; } +#ifdef CONFIG_EXT4_FS_ENCRYPTION static int uuid_is_zero(__u8 u[16]) { int i; @@ -200,6 +201,7 @@ static int uuid_is_zero(__u8 u[16]) return 0; return 1; } +#endif static int ext4_ioctl_setflags(struct inode *inode, unsigned int flags) @@ -770,28 +772,19 @@ resizefs_out: } case EXT4_IOC_PRECACHE_EXTENTS: return ext4_ext_precache(inode); - case EXT4_IOC_SET_ENCRYPTION_POLICY: { -#ifdef CONFIG_EXT4_FS_ENCRYPTION - struct fscrypt_policy policy; + case EXT4_IOC_SET_ENCRYPTION_POLICY: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; + return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); - if (copy_from_user(&policy, - (struct fscrypt_policy __user *)arg, - sizeof(policy))) - return -EFAULT; - return fscrypt_process_policy(filp, &policy); -#else - return -EOPNOTSUPP; -#endif - } case EXT4_IOC_GET_ENCRYPTION_PWSALT: { +#ifdef CONFIG_EXT4_FS_ENCRYPTION int err, err2; struct ext4_sb_info *sbi = EXT4_SB(sb); handle_t *handle; - if (!ext4_sb_has_crypto(sb)) + if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; if (uuid_is_zero(sbi->s_es->s_encrypt_pw_salt)) { err = mnt_want_write_file(filp); @@ -821,30 +814,18 @@ resizefs_out: sbi->s_es->s_encrypt_pw_salt, 16)) return -EFAULT; return 0; - } - case EXT4_IOC_GET_ENCRYPTION_POLICY: { -#ifdef CONFIG_EXT4_FS_ENCRYPTION - struct fscrypt_policy policy; - int err = 0; - - if (!ext4_encrypted_inode(inode)) - return -ENOENT; - err = fscrypt_get_policy(inode, &policy); - if (err) - return err; - if (copy_to_user((void __user *)arg, &policy, sizeof(policy))) - return -EFAULT; - return 0; #else return -EOPNOTSUPP; #endif } + case EXT4_IOC_GET_ENCRYPTION_POLICY: + return fscrypt_ioctl_get_policy(filp, (void __user *)arg); + case EXT4_IOC_FSGETXATTR: { struct fsxattr fa; memset(&fa, 0, sizeof(struct fsxattr)); - ext4_get_inode_flags(ei); fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE); if (ext4_has_feature_project(inode->i_sb)) { diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 423a21cd077c..c30889b60848 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1237,37 +1237,24 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) } /* - * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure. + * Test whether a directory entry matches the filename being searched for. * - * `len <= EXT4_NAME_LEN' is guaranteed by caller. - * `de != NULL' is guaranteed by caller. + * Return: %true if the directory entry matches, otherwise %false. */ -static inline int ext4_match(struct ext4_filename *fname, - struct ext4_dir_entry_2 *de) +static inline bool ext4_match(const struct ext4_filename *fname, + const struct ext4_dir_entry_2 *de) { - const void *name = fname_name(fname); - u32 len = fname_len(fname); + struct fscrypt_name f; if (!de->inode) - return 0; + return false; + f.usr_fname = fname->usr_fname; + f.disk_name = fname->disk_name; #ifdef CONFIG_EXT4_FS_ENCRYPTION - if (unlikely(!name)) { - if (fname->usr_fname->name[0] == '_') { - int ret; - if (de->name_len <= 32) - return 0; - ret = memcmp(de->name + ((de->name_len - 17) & ~15), - fname->crypto_buf.name + 8, 16); - return (ret == 0) ? 1 : 0; - } - name = fname->crypto_buf.name; - len = fname->crypto_buf.len; - } + f.crypto_buf = fname->crypto_buf; #endif - if (de->name_len != len) - return 0; - return (memcmp(de->name, name, len) == 0) ? 1 : 0; + return fscrypt_match_name(&f, de->name, de->name_len); } /* @@ -1281,48 +1268,31 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size, struct ext4_dir_entry_2 * de; char * dlimit; int de_len; - int res; de = (struct ext4_dir_entry_2 *)search_buf; dlimit = search_buf + buf_size; while ((char *) de < dlimit) { /* this code is executed quadratically often */ /* do minimal checking `by hand' */ - if ((char *) de + de->name_len <= dlimit) { - res = ext4_match(fname, de); - if (res < 0) { - res = -1; - goto return_result; - } - if (res > 0) { - /* found a match - just to be sure, do - * a full check */ - if (ext4_check_dir_entry(dir, NULL, de, bh, - bh->b_data, - bh->b_size, offset)) { - res = -1; - goto return_result; - } - *res_dir = de; - res = 1; - goto return_result; - } - + if ((char *) de + de->name_len <= dlimit && + ext4_match(fname, de)) { + /* found a match - just to be sure, do + * a full check */ + if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data, + bh->b_size, offset)) + return -1; + *res_dir = de; + return 1; } /* prevent looping on a bad block */ de_len = ext4_rec_len_from_disk(de->rec_len, dir->i_sb->s_blocksize); - if (de_len <= 0) { - res = -1; - goto return_result; - } + if (de_len <= 0) + return -1; offset += de_len; de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); } - - res = 0; -return_result: - return res; + return 0; } static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block, @@ -1378,6 +1348,8 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, return NULL; retval = ext4_fname_setup_filename(dir, d_name, 1, &fname); + if (retval == -ENOENT) + return NULL; if (retval) return ERR_PTR(retval); @@ -1614,16 +1586,9 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi if (!IS_ERR(inode) && ext4_encrypted_inode(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !fscrypt_has_permitted_context(dir, inode)) { - int nokey = ext4_encrypted_inode(inode) && - !fscrypt_has_encryption_key(inode); - if (nokey) { - iput(inode); - return ERR_PTR(-ENOKEY); - } ext4_warning(inode->i_sb, "Inconsistent encryption contexts: %lu/%lu", - (unsigned long) dir->i_ino, - (unsigned long) inode->i_ino); + dir->i_ino, inode->i_ino); iput(inode); return ERR_PTR(-EPERM); } @@ -1831,24 +1796,15 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode, int nlen, rlen; unsigned int offset = 0; char *top; - int res; de = (struct ext4_dir_entry_2 *)buf; top = buf + buf_size - reclen; while ((char *) de <= top) { if (ext4_check_dir_entry(dir, NULL, de, bh, - buf, buf_size, offset)) { - res = -EFSCORRUPTED; - goto return_result; - } - /* Provide crypto context and crypto buffer to ext4 match */ - res = ext4_match(fname, de); - if (res < 0) - goto return_result; - if (res > 0) { - res = -EEXIST; - goto return_result; - } + buf, buf_size, offset)) + return -EFSCORRUPTED; + if (ext4_match(fname, de)) + return -EEXIST; nlen = EXT4_DIR_REC_LEN(de->name_len); rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); if ((de->inode ? rlen - nlen : rlen) >= reclen) @@ -1856,22 +1812,17 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode, de = (struct ext4_dir_entry_2 *)((char *)de + rlen); offset += rlen; } - if ((char *) de > top) - res = -ENOSPC; - else { - *dest_de = de; - res = 0; - } -return_result: - return res; + return -ENOSPC; + + *dest_de = de; + return 0; } -int ext4_insert_dentry(struct inode *dir, - struct inode *inode, - struct ext4_dir_entry_2 *de, - int buf_size, - struct ext4_filename *fname) +void ext4_insert_dentry(struct inode *inode, + struct ext4_dir_entry_2 *de, + int buf_size, + struct ext4_filename *fname) { int nlen, rlen; @@ -1890,7 +1841,6 @@ int ext4_insert_dentry(struct inode *dir, ext4_set_de_type(inode->i_sb, de, inode->i_mode); de->name_len = fname_len(fname); memcpy(de->name, fname_name(fname), fname_len(fname)); - return 0; } /* @@ -1926,11 +1876,8 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, return err; } - /* By now the buffer is marked for journaling. Due to crypto operations, - * the following function call may fail */ - err = ext4_insert_dentry(dir, inode, de, blocksize, fname); - if (err < 0) - return err; + /* By now the buffer is marked for journaling */ + ext4_insert_dentry(inode, de, blocksize, fname); /* * XXX shouldn't update any times until successful @@ -3090,7 +3037,7 @@ static int ext4_symlink(struct inode *dir, if (err) return err; if (!fscrypt_has_encryption_key(dir)) - return -EPERM; + return -ENOKEY; disk_link.len = (fscrypt_fname_encrypted_size(dir, len) + sizeof(struct fscrypt_symlink_data)); sd = kzalloc(disk_link.len, GFP_KERNEL); @@ -3527,6 +3474,12 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, EXT4_I(old_dentry->d_inode)->i_projid))) return -EXDEV; + if ((ext4_encrypted_inode(old_dir) && + !fscrypt_has_encryption_key(old_dir)) || + (ext4_encrypted_inode(new_dir) && + !fscrypt_has_encryption_key(new_dir))) + return -ENOKEY; + retval = dquot_initialize(old.dir); if (retval) return retval; @@ -3726,6 +3679,12 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, u8 new_file_type; int retval; + if ((ext4_encrypted_inode(old_dir) && + !fscrypt_has_encryption_key(old_dir)) || + (ext4_encrypted_inode(new_dir) && + !fscrypt_has_encryption_key(new_dir))) + return -ENOKEY; + if ((ext4_encrypted_inode(old_dir) || ext4_encrypted_inode(new_dir)) && (old_dir != new_dir) && diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 0094923e5ebf..0718a8667f0c 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -24,7 +24,6 @@ #include #include #include -#include #include "ext4_jbd2.h" #include "xattr.h" @@ -470,7 +469,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io, gfp_t gfp_flags = GFP_NOFS; retry_encrypt: - data_page = fscrypt_encrypt_page(inode, page, gfp_flags); + data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0, + page->index, gfp_flags); if (IS_ERR(data_page)) { ret = PTR_ERR(data_page); if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5fa9ba1de429..01b39e7447d6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1102,51 +1102,65 @@ static int ext4_get_context(struct inode *inode, void *ctx, size_t len) EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len); } -static int ext4_key_prefix(struct inode *inode, u8 **key) -{ - *key = EXT4_SB(inode->i_sb)->key_prefix; - return EXT4_SB(inode->i_sb)->key_prefix_size; -} - -static int ext4_prepare_context(struct inode *inode) -{ - return ext4_convert_inline_data(inode); -} - static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, void *fs_data) { - handle_t *handle; - int res, res2; + handle_t *handle = fs_data; + int res, res2, retries = 0; - /* fs_data is null when internally used. */ - if (fs_data) { - res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION, - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, - len, 0); + res = ext4_convert_inline_data(inode); + if (res) + return res; + + /* + * If a journal handle was specified, then the encryption context is + * being set on a new inode via inheritance and is part of a larger + * transaction to create the inode. Otherwise the encryption context is + * being set on an existing inode in its own transaction. Only in the + * latter case should the "retry on ENOSPC" logic be used. + */ + + if (handle) { + res = ext4_xattr_set_handle(handle, inode, + EXT4_XATTR_INDEX_ENCRYPTION, + EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, + ctx, len, 0); if (!res) { ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); + /* + * Update inode->i_flags - e.g. S_DAX may get disabled + */ + ext4_set_inode_flags(inode); } return res; } + res = dquot_initialize(inode); + if (res) + return res; +retry: handle = ext4_journal_start(inode, EXT4_HT_MISC, ext4_jbd2_credits_xattr(inode)); if (IS_ERR(handle)) return PTR_ERR(handle); - res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION, - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, - len, 0); + res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION, + EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, + ctx, len, 0); if (!res) { ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); + /* Update inode->i_flags - e.g. S_DAX may get disabled */ + ext4_set_inode_flags(inode); res = ext4_mark_inode_dirty(handle, inode); if (res) EXT4_ERROR_INODE(inode, "Failed to mark inode dirty"); } res2 = ext4_journal_stop(handle); + + if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry; if (!res) res = res2; return res; @@ -1163,10 +1177,9 @@ static unsigned ext4_max_namelen(struct inode *inode) EXT4_NAME_LEN; } -static struct fscrypt_operations ext4_cryptops = { +static const struct fscrypt_operations ext4_cryptops = { + .key_prefix = "ext4:", .get_context = ext4_get_context, - .key_prefix = ext4_key_prefix, - .prepare_context = ext4_prepare_context, .set_context = ext4_set_context, .dummy_context = ext4_dummy_context, .is_encrypted = ext4_encrypted_inode, @@ -1174,7 +1187,7 @@ static struct fscrypt_operations ext4_cryptops = { .max_namelen = ext4_max_namelen, }; #else -static struct fscrypt_operations ext4_cryptops = { +static const struct fscrypt_operations ext4_cryptops = { .is_encrypted = ext4_encrypted_inode, }; #endif @@ -2334,6 +2347,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, unsigned int s_flags = sb->s_flags; int nr_orphans = 0, nr_truncates = 0; #ifdef CONFIG_QUOTA + int quota_update = 0; int i; #endif if (!es->s_last_orphan) { @@ -2372,14 +2386,32 @@ static void ext4_orphan_cleanup(struct super_block *sb, #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ sb->s_flags |= MS_ACTIVE; - /* Turn on quotas so that they are updated correctly */ + + /* + * Turn on quotas which were not enabled for read-only mounts if + * filesystem has quota feature, so that they are updated correctly. + */ + if (ext4_has_feature_quota(sb) && (s_flags & MS_RDONLY)) { + int ret = ext4_enable_quotas(sb); + + if (!ret) + quota_update = 1; + else + ext4_msg(sb, KERN_ERR, + "Cannot turn on quotas: error %d", ret); + } + + /* Turn on journaled quotas used for old sytle */ for (i = 0; i < EXT4_MAXQUOTAS; i++) { if (EXT4_SB(sb)->s_qf_names[i]) { int ret = ext4_quota_on_mount(sb, i); - if (ret < 0) + + if (!ret) + quota_update = 1; + else ext4_msg(sb, KERN_ERR, "Cannot turn on journaled " - "quota: error %d", ret); + "quota: type %d: error %d", i, ret); } } #endif @@ -2438,10 +2470,12 @@ static void ext4_orphan_cleanup(struct super_block *sb, ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", PLURAL(nr_truncates)); #ifdef CONFIG_QUOTA - /* Turn quotas off */ - for (i = 0; i < EXT4_MAXQUOTAS; i++) { - if (sb_dqopt(sb)->files[i]) - dquot_quota_off(sb, i); + /* Turn off quotas if they were enabled for orphan cleanup */ + if (quota_update) { + for (i = 0; i < EXT4_MAXQUOTAS; i++) { + if (sb_dqopt(sb)->files[i]) + dquot_quota_off(sb, i); + } } #endif sb->s_flags = s_flags; /* Restore MS_RDONLY status */ @@ -4199,11 +4233,6 @@ no_journal: ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10); kfree(orig_data); -#ifdef CONFIG_EXT4_FS_ENCRYPTION - memcpy(sbi->key_prefix, EXT4_KEY_DESC_PREFIX, - EXT4_KEY_DESC_PREFIX_SIZE); - sbi->key_prefix_size = EXT4_KEY_DESC_PREFIX_SIZE; -#endif return 0; cantfind_ext4: @@ -5365,6 +5394,9 @@ static int ext4_enable_quotas(struct super_block *sb) DQUOT_USAGE_ENABLED | (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0)); if (err) { + for (type--; type >= 0; type--) + dquot_quota_off(sb, type); + ext4_warning(sb, "Failed to enable quota tracking " "(type=%d, err=%d). Please run " diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index ca949ea7c02f..a0dc559b1b47 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_F2FS_FS) += f2fs.o f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o -f2fs-y += shrinker.o extent_cache.o +f2fs-y += shrinker.o extent_cache.o sysfs.o f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 55aa29c0c78d..436b3a1464d9 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -207,15 +207,16 @@ static int __f2fs_set_acl(struct inode *inode, int type, void *value = NULL; size_t size = 0; int error; + umode_t mode = inode->i_mode; switch (type) { case ACL_TYPE_ACCESS: name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl && !ipage) { - error = posix_acl_update_mode(inode, &inode->i_mode, &acl); + error = posix_acl_update_mode(inode, &mode, &acl); if (error) return error; - set_acl_inode(inode, inode->i_mode); + set_acl_inode(inode, mode); } break; @@ -233,7 +234,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, value = f2fs_acl_to_disk(F2FS_I_SB(inode), acl, &size); if (IS_ERR(value)) { clear_inode_flag(inode, FI_ACL_MODE); - return (int)PTR_ERR(value); + return PTR_ERR(value); } } @@ -384,7 +385,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, if (error) return error; - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, true); if (default_acl) { error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl, diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index b4dbc2f59656..04fe1df052b2 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -31,7 +31,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) set_ckpt_flags(sbi, CP_ERROR_FLAG); sbi->sb->s_flags |= MS_RDONLY; if (!end_io) - f2fs_flush_merged_bios(sbi); + f2fs_flush_merged_writes(sbi); } /* @@ -65,7 +65,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .sbi = sbi, .type = META, .op = REQ_OP_READ, - .op_flags = READ_SYNC | REQ_META | REQ_PRIO, + .op_flags = REQ_META | REQ_PRIO, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, @@ -160,8 +160,9 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .sbi = sbi, .type = META, .op = REQ_OP_READ, - .op_flags = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : REQ_RAHEAD, + .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, + .in_list = false, }; struct blk_plug plug; @@ -207,12 +208,10 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, } fio.page = page; - fio.old_blkaddr = fio.new_blkaddr; - f2fs_submit_page_mbio(&fio); + f2fs_submit_page_bio(&fio); f2fs_put_page(page, 0); } out: - f2fs_submit_merged_bio(sbi, META, READ); blk_finish_plug(&plug); return blkno - start; } @@ -228,11 +227,12 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) f2fs_put_page(page, 0); if (readahead) - ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR, true); + ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true); } -static int f2fs_write_meta_page(struct page *page, - struct writeback_control *wbc) +static int __f2fs_write_meta_page(struct page *page, + struct writeback_control *wbc, + enum iostat_type io_type) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); @@ -245,16 +245,17 @@ static int f2fs_write_meta_page(struct page *page, if (unlikely(f2fs_cp_error(sbi))) goto redirty_out; - write_meta_page(sbi, page); + write_meta_page(sbi, page, io_type); dec_page_count(sbi, F2FS_DIRTY_META); if (wbc->for_reclaim) - f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE); + f2fs_submit_merged_write_cond(sbi, page->mapping->host, + 0, page->index, META); unlock_page(page); if (unlikely(f2fs_cp_error(sbi))) - f2fs_submit_merged_bio(sbi, META, WRITE); + f2fs_submit_merged_write(sbi, META); return 0; @@ -263,23 +264,33 @@ redirty_out: return AOP_WRITEPAGE_ACTIVATE; } +static int f2fs_write_meta_page(struct page *page, + struct writeback_control *wbc) +{ + return __f2fs_write_meta_page(page, wbc, FS_META_IO); +} + static int f2fs_write_meta_pages(struct address_space *mapping, struct writeback_control *wbc) { struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); long diff, written; + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + goto skip_write; + /* collect a number of dirty meta pages and write together */ if (wbc->for_kupdate || get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) goto skip_write; - trace_f2fs_writepages(mapping->host, wbc, META); + /* if locked failed, cp will flush dirty pages instead */ + if (!mutex_trylock(&sbi->cp_mutex)) + goto skip_write; - /* if mounting is failed, skip writing node pages */ - mutex_lock(&sbi->cp_mutex); + trace_f2fs_writepages(mapping->host, wbc, META); diff = nr_pages_to_write(sbi, META, wbc); - written = sync_meta_pages(sbi, META, wbc->nr_to_write); + written = sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO); mutex_unlock(&sbi->cp_mutex); wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); return 0; @@ -291,7 +302,7 @@ skip_write: } long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, - long nr_to_write) + long nr_to_write, enum iostat_type io_type) { struct address_space *mapping = META_MAPPING(sbi); pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX; @@ -342,7 +353,7 @@ continue_unlock: if (!clear_page_dirty_for_io(page)) goto continue_unlock; - if (mapping->a_ops->writepage(page, &wbc)) { + if (__f2fs_write_meta_page(page, &wbc, io_type)) { unlock_page(page); break; } @@ -356,7 +367,7 @@ continue_unlock: } stop: if (nwritten) - f2fs_submit_merged_bio(sbi, type, WRITE); + f2fs_submit_merged_write(sbi, type); blk_finish_plug(&plug); @@ -493,6 +504,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) #ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_ORPHAN)) { spin_unlock(&im->ino_lock); + f2fs_show_injection_info(FAULT_ORPHAN); return -ENOSPC; } #endif @@ -565,7 +577,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) if (ni.blk_addr != NULL_ADDR) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x), run fsck to fix.", + "%s: orphan failed (ino=%x) by kernel, retry mount.", __func__, ino); return -EIO; } @@ -576,11 +588,24 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) int recover_orphan_inodes(struct f2fs_sb_info *sbi) { block_t start_blk, orphan_blocks, i, j; - int err; + unsigned int s_flags = sbi->sb->s_flags; + int err = 0; if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) return 0; + if (s_flags & MS_RDONLY) { + f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); + sbi->sb->s_flags &= ~MS_RDONLY; + } + +#ifdef CONFIG_QUOTA + /* Needed for iput() to work correctly and not trash data */ + sbi->sb->s_flags |= MS_ACTIVE; + /* Turn on quotas so that they are updated correctly */ + f2fs_enable_quota_files(sbi); +#endif + start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); @@ -596,14 +621,21 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) err = recover_orphan_inode(sbi, ino); if (err) { f2fs_put_page(page, 1); - return err; + goto out; } } f2fs_put_page(page, 1); } /* clear Orphan Flag */ clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG); - return 0; +out: +#ifdef CONFIG_QUOTA + /* Turn quotas off */ + f2fs_quota_off_umount(sbi->sb); +#endif + sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ + + return err; } static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) @@ -675,14 +707,13 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page); crc_offset = le32_to_cpu((*cp_block)->checksum_offset); - if (crc_offset >= blk_size) { + if (crc_offset > (blk_size - sizeof(__le32))) { f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc_offset: %zu", crc_offset); return -EINVAL; } - crc = le32_to_cpu(*((__le32 *)((unsigned char *)*cp_block - + crc_offset))); + crc = cur_cp_crc(*cp_block); if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) { f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value"); return -EINVAL; @@ -770,7 +801,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) /* Sanity checking of checkpoint */ if (sanity_check_ckpt(sbi)) - goto fail_no_cp; + goto free_fail_no_cp; if (cur_page == cp1) sbi->cur_cp_pack = 1; @@ -798,6 +829,9 @@ done: f2fs_put_page(cp2, 1); return 0; +free_fail_no_cp: + f2fs_put_page(cp1, 1); + f2fs_put_page(cp2, 1); fail_no_cp: kfree(sbi->ckpt); return -EINVAL; @@ -812,7 +846,9 @@ static void __add_dirty_inode(struct inode *inode, enum inode_type type) return; set_inode_flag(inode, flag); - list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]); + if (!f2fs_is_volatile_file(inode)) + list_add_tail(&F2FS_I(inode)->dirty_list, + &sbi->inode_list[type]); stat_inc_dirty_inode(sbi, type); } @@ -870,6 +906,7 @@ int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) struct inode *inode; struct f2fs_inode_info *fi; bool is_dir = (type == DIR_INODE); + unsigned long ino = 0; trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir, get_pages(sbi, is_dir ? @@ -888,18 +925,34 @@ retry: F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); return 0; } - fi = list_entry(head->next, struct f2fs_inode_info, dirty_list); + fi = list_first_entry(head, struct f2fs_inode_info, dirty_list); inode = igrab(&fi->vfs_inode); spin_unlock(&sbi->inode_lock[type]); if (inode) { + unsigned long cur_ino = inode->i_ino; + + if (is_dir) + F2FS_I(inode)->cp_task = current; + filemap_fdatawrite(inode->i_mapping); + + if (is_dir) + F2FS_I(inode)->cp_task = NULL; + iput(inode); + /* We need to give cpu to another writers. */ + if (ino == cur_ino) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + cond_resched(); + } else { + ino = cur_ino; + } } else { /* * We should submit bio, since it exists several * wribacking dentry pages in the freeing inode. */ - f2fs_submit_merged_bio(sbi, DATA, WRITE); + f2fs_submit_merged_write(sbi, DATA); cond_resched(); } goto retry; @@ -921,18 +974,35 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi) spin_unlock(&sbi->inode_lock[DIRTY_META]); return 0; } - fi = list_entry(head->next, struct f2fs_inode_info, + fi = list_first_entry(head, struct f2fs_inode_info, gdirty_list); inode = igrab(&fi->vfs_inode); spin_unlock(&sbi->inode_lock[DIRTY_META]); if (inode) { - update_inode_page(inode); + sync_inode_metadata(inode, 0); + + /* it's on eviction */ + if (is_inode_flag_set(inode, FI_DIRTY_INODE)) + update_inode_page(inode); iput(inode); } }; return 0; } +static void __prepare_cp_block(struct f2fs_sb_info *sbi) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct f2fs_nm_info *nm_i = NM_I(sbi); + nid_t last_nid = nm_i->next_scan_nid; + + next_free_nid(sbi, &last_nid); + ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); + ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); + ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); + ckpt->next_free_nid = cpu_to_le32(last_nid); +} + /* * Freeze all the FS-operations for checkpoint. */ @@ -956,33 +1026,47 @@ retry_flush_dents: err = sync_dirty_inodes(sbi, DIR_INODE); if (err) goto out; - goto retry_flush_dents; - } - - if (get_pages(sbi, F2FS_DIRTY_IMETA)) { - f2fs_unlock_all(sbi); - err = f2fs_sync_inode_meta(sbi); - if (err) - goto out; + cond_resched(); goto retry_flush_dents; } /* * POR: we should ensure that there are no dirty node pages - * until finishing nat/sit flush. + * until finishing nat/sit flush. inode->i_blocks can be updated. */ + down_write(&sbi->node_change); + + if (get_pages(sbi, F2FS_DIRTY_IMETA)) { + up_write(&sbi->node_change); + f2fs_unlock_all(sbi); + err = f2fs_sync_inode_meta(sbi); + if (err) + goto out; + cond_resched(); + goto retry_flush_dents; + } + retry_flush_nodes: down_write(&sbi->node_write); if (get_pages(sbi, F2FS_DIRTY_NODES)) { up_write(&sbi->node_write); - err = sync_node_pages(sbi, &wbc); + err = sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO); if (err) { + up_write(&sbi->node_change); f2fs_unlock_all(sbi); goto out; } + cond_resched(); goto retry_flush_nodes; } + + /* + * sbi->node_change is used only for AIO write_begin path which produces + * dirty node blocks and some checkpoint values by block allocation. + */ + __prepare_cp_block(sbi); + up_write(&sbi->node_change); out: blk_finish_plug(&plug); return err; @@ -991,8 +1075,6 @@ out: static void unblock_operations(struct f2fs_sb_info *sbi) { up_write(&sbi->node_write); - - build_free_nids(sbi); f2fs_unlock_all(sbi); } @@ -1003,7 +1085,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) for (;;) { prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); - if (!atomic_read(&sbi->nr_wb_bios)) + if (!get_pages(sbi, F2FS_WB_CP_DATA)) break; io_schedule_timeout(5*HZ); @@ -1015,15 +1097,24 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) { unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + unsigned long flags; - spin_lock(&sbi->cp_lock); + spin_lock_irqsave(&sbi->cp_lock, flags); - if (cpc->reason == CP_UMOUNT) + if ((cpc->reason & CP_UMOUNT) && + le32_to_cpu(ckpt->cp_pack_total_block_count) > + sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) + disable_nat_bits(sbi, false); + + if (cpc->reason & CP_TRIMMED) + __set_ckpt_flags(ckpt, CP_TRIMMED_FLAG); + + if (cpc->reason & CP_UMOUNT) __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); else __clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); - if (cpc->reason == CP_FASTBOOT) + if (cpc->reason & CP_FASTBOOT) __set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); else __clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); @@ -1039,15 +1130,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* set this flag to activate crc|cp_ver for recovery */ __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); - spin_unlock(&sbi->cp_lock); + spin_unlock_irqrestore(&sbi->cp_lock, flags); } static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct f2fs_nm_info *nm_i = NM_I(sbi); - unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; - nid_t last_nid = nm_i->next_scan_nid; + unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num, flags; block_t start_blk; unsigned int data_sum_blocks, orphan_blocks; __u32 crc32 = 0; @@ -1059,19 +1149,16 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Flush all the NAT/SIT pages */ while (get_pages(sbi, F2FS_DIRTY_META)) { - sync_meta_pages(sbi, META, LONG_MAX); + sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); if (unlikely(f2fs_cp_error(sbi))) return -EIO; } - next_free_nid(sbi, &last_nid); - /* * modify checkpoint * version number is already updated */ ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); - ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { ckpt->cur_node_segno[i] = @@ -1090,18 +1177,14 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); } - ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); - ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); - ckpt->next_free_nid = cpu_to_le32(last_nid); - /* 2 cp + n data seg summary + orphan inode blocks */ data_sum_blocks = npages_for_summary_flush(sbi, false); - spin_lock(&sbi->cp_lock); + spin_lock_irqsave(&sbi->cp_lock, flags); if (data_sum_blocks < NR_CURSEG_DATA_TYPE) __set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); else __clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); - spin_unlock(&sbi->cp_lock); + spin_unlock_irqrestore(&sbi->cp_lock, flags); orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num); ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + @@ -1130,6 +1213,27 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk = __start_cp_next_addr(sbi); + /* write nat bits */ + if (enabled_nat_bits(sbi, cpc)) { + __u64 cp_ver = cur_cp_version(ckpt); + block_t blk; + + cp_ver |= ((__u64)crc32 << 32); + *(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver); + + blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks; + for (i = 0; i < nm_i->nat_bits_blocks; i++) + update_meta_page(sbi, nm_i->nat_bits + + (i << F2FS_BLKSIZE_BITS), blk + i); + + /* Flush all the NAT BITS pages */ + while (get_pages(sbi, F2FS_DIRTY_META)) { + sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + } + } + /* need to wait for end_io results */ wait_on_all_pages_writeback(sbi); if (unlikely(f2fs_cp_error(sbi))) @@ -1179,7 +1283,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) percpu_counter_set(&sbi->alloc_valid_block_count, 0); /* Here, we only have one bio having CP pack */ - sync_meta_pages(sbi, META_FLUSH, LONG_MAX); + sync_meta_pages(sbi, META_FLUSH, LONG_MAX, FS_CP_META_IO); /* wait for previous submitted meta pages writeback */ wait_on_all_pages_writeback(sbi); @@ -1189,7 +1293,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (unlikely(f2fs_cp_error(sbi))) return -EIO; - clear_prefree_segments(sbi, cpc); clear_sbi_flag(sbi, SBI_IS_DIRTY); clear_sbi_flag(sbi, SBI_NEED_CP); __set_cp_next_pack(sbi); @@ -1219,8 +1322,8 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && - (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC || - (cpc->reason == CP_DISCARD && !sbi->discard_blks))) + ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) || + ((cpc->reason & CP_DISCARD) && !sbi->discard_blks))) goto out; if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; @@ -1239,18 +1342,23 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops"); - f2fs_flush_merged_bios(sbi); + f2fs_flush_merged_writes(sbi); /* this is the case of multiple fstrims without any changes */ - if (cpc->reason == CP_DISCARD && !is_sbi_flag_set(sbi, SBI_IS_DIRTY)) { - f2fs_bug_on(sbi, NM_I(sbi)->dirty_nat_cnt); - f2fs_bug_on(sbi, SIT_I(sbi)->dirty_sentries); - f2fs_bug_on(sbi, prefree_segments(sbi)); - flush_sit_entries(sbi, cpc); - clear_prefree_segments(sbi, cpc); - f2fs_wait_all_discard_bio(sbi); - unblock_operations(sbi); - goto out; + if (cpc->reason & CP_DISCARD) { + if (!exist_trim_candidates(sbi, cpc)) { + unblock_operations(sbi); + goto out; + } + + if (NM_I(sbi)->dirty_nat_cnt == 0 && + SIT_I(sbi)->dirty_sentries == 0 && + prefree_segments(sbi) == 0) { + flush_sit_entries(sbi, cpc); + clear_prefree_segments(sbi, cpc); + unblock_operations(sbi); + goto out; + } } /* @@ -1262,18 +1370,20 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); /* write cached NAT/SIT entries to NAT/SIT area */ - flush_nat_entries(sbi); + flush_nat_entries(sbi, cpc); flush_sit_entries(sbi, cpc); /* unlock all the fs_lock[] in do_checkpoint() */ err = do_checkpoint(sbi, cpc); - - f2fs_wait_all_discard_bio(sbi); + if (err) + release_discard_addrs(sbi); + else + clear_prefree_segments(sbi, cpc); unblock_operations(sbi); stat_inc_cp_count(sbi->stat_info); - if (cpc->reason == CP_RECOVERY) + if (cpc->reason & CP_RECOVERY) f2fs_msg(sbi->sb, KERN_NOTICE, "checkpoint: version = %llx", ckpt_ver); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2c5ae0b41ada..16f86668fa4e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -30,14 +30,36 @@ #include #include +static bool __is_cp_guaranteed(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode; + struct f2fs_sb_info *sbi; + + if (!mapping) + return false; + + inode = mapping->host; + sbi = F2FS_I_SB(inode); + + if (inode->i_ino == F2FS_META_INO(sbi) || + inode->i_ino == F2FS_NODE_INO(sbi) || + S_ISDIR(inode->i_mode) || + is_cold_data(page)) + return true; + return false; +} + static void f2fs_read_end_io(struct bio *bio) { struct bio_vec *bvec; int i; #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) + if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) { + f2fs_show_injection_info(FAULT_IO); bio->bi_error = -EIO; + } #endif if (f2fs_bio_encrypted(bio)) { @@ -72,6 +94,18 @@ static void f2fs_write_end_io(struct bio *bio) bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; + enum count_type type = WB_DATA_TYPE(page); + + if (IS_DUMMY_WRITTEN_PAGE(page)) { + set_page_private(page, (unsigned long)NULL); + ClearPagePrivate(page); + unlock_page(page); + mempool_free(page, sbi->write_io_dummy); + + if (unlikely(bio->bi_error)) + f2fs_stop_checkpoint(sbi, true); + continue; + } fscrypt_pullback_bio_page(&page, true); @@ -79,15 +113,57 @@ static void f2fs_write_end_io(struct bio *bio) mapping_set_error(page->mapping, -EIO); f2fs_stop_checkpoint(sbi, true); } + dec_page_count(sbi, type); + clear_cold_data(page); end_page_writeback(page); } - if (atomic_dec_and_test(&sbi->nr_wb_bios) && + if (!get_pages(sbi, F2FS_WB_CP_DATA) && wq_has_sleeper(&sbi->cp_wait)) wake_up(&sbi->cp_wait); bio_put(bio); } +/* + * Return true, if pre_bio's bdev is same as its target device. + */ +struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, + block_t blk_addr, struct bio *bio) +{ + struct block_device *bdev = sbi->sb->s_bdev; + int i; + + for (i = 0; i < sbi->s_ndevs; i++) { + if (FDEV(i).start_blk <= blk_addr && + FDEV(i).end_blk >= blk_addr) { + blk_addr -= FDEV(i).start_blk; + bdev = FDEV(i).bdev; + break; + } + } + if (bio) { + bio->bi_bdev = bdev; + bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr); + } + return bdev; +} + +int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) +{ + int i; + + for (i = 0; i < sbi->s_ndevs; i++) + if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr) + return i; + return 0; +} + +static bool __same_bdev(struct f2fs_sb_info *sbi, + block_t blk_addr, struct bio *bio) +{ + return f2fs_target_device(sbi, blk_addr, NULL) == bio->bi_bdev; +} + /* * Low-level block read/write IO operations. */ @@ -98,8 +174,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, bio = f2fs_bio_alloc(npages); - bio->bi_bdev = sbi->sb->s_bdev; - bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr); + f2fs_target_device(sbi, blk_addr, bio); bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; bio->bi_private = is_read ? NULL : sbi; @@ -110,11 +185,46 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, struct bio *bio, enum page_type type) { if (!is_read_io(bio_op(bio))) { - atomic_inc(&sbi->nr_wb_bios); - if (f2fs_sb_mounted_hmsmr(sbi->sb) && + unsigned int start; + + if (f2fs_sb_mounted_blkzoned(sbi->sb) && current->plug && (type == DATA || type == NODE)) blk_finish_plug(current->plug); + + if (type != DATA && type != NODE) + goto submit_io; + + start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS; + start %= F2FS_IO_SIZE(sbi); + + if (start == 0) + goto submit_io; + + /* fill dummy pages */ + for (; start < F2FS_IO_SIZE(sbi); start++) { + struct page *page = + mempool_alloc(sbi->write_io_dummy, + GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL); + f2fs_bug_on(sbi, !page); + + SetPagePrivate(page); + set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE); + lock_page(page); + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) + f2fs_bug_on(sbi, 1); + } + /* + * In the NODE case, we lose next block address chain. So, we + * need to do checkpoint in f2fs_sync_file. + */ + if (type == NODE) + set_sbi_flag(sbi, SBI_NEED_CP); } +submit_io: + if (is_read_io(bio_op(bio))) + trace_f2fs_submit_read_bio(sbi->sb, type, bio); + else + trace_f2fs_submit_write_bio(sbi->sb, type, bio); submit_bio(bio); } @@ -125,19 +235,19 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) if (!io->bio) return; - if (is_read_io(fio->op)) - trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio); - else - trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio); - bio_set_op_attrs(io->bio, fio->op, fio->op_flags); + if (is_read_io(fio->op)) + trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio); + else + trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio); + __submit_bio(io->sbi, io->bio, fio->type); io->bio = NULL; } -static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, - struct page *page, nid_t ino) +static bool __has_merged_page(struct f2fs_bio_info *io, + struct inode *inode, nid_t ino, pgoff_t idx) { struct bio_vec *bvec; struct page *target; @@ -146,7 +256,7 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, if (!io->bio) return false; - if (!inode && !page && !ino) + if (!inode && !ino) return true; bio_for_each_segment_all(bvec, io->bio, i) { @@ -156,10 +266,11 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, else target = fscrypt_control_page(bvec->bv_page); + if (idx != target->index) + continue; + if (inode && inode == target->mapping->host) return true; - if (page && page == target) - return true; if (ino && ino == ino_of_node(target)) return true; } @@ -168,72 +279,88 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, } static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, - struct page *page, nid_t ino, - enum page_type type) + nid_t ino, pgoff_t idx, enum page_type type) { enum page_type btype = PAGE_TYPE_OF_BIO(type); - struct f2fs_bio_info *io = &sbi->write_io[btype]; - bool ret; + enum temp_type temp; + struct f2fs_bio_info *io; + bool ret = false; - down_read(&io->io_rwsem); - ret = __has_merged_page(io, inode, page, ino); - up_read(&io->io_rwsem); + for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { + io = sbi->write_io[btype] + temp; + + down_read(&io->io_rwsem); + ret = __has_merged_page(io, inode, ino, idx); + up_read(&io->io_rwsem); + + /* TODO: use HOT temp only for meta pages now. */ + if (ret || btype == META) + break; + } return ret; } -static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, - struct inode *inode, struct page *page, - nid_t ino, enum page_type type, int rw) +static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, + enum page_type type, enum temp_type temp) { enum page_type btype = PAGE_TYPE_OF_BIO(type); - struct f2fs_bio_info *io; - - io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype]; + struct f2fs_bio_info *io = sbi->write_io[btype] + temp; down_write(&io->io_rwsem); - if (!__has_merged_page(io, inode, page, ino)) - goto out; - /* change META to META_FLUSH in the checkpoint procedure */ if (type >= META_FLUSH) { io->fio.type = META_FLUSH; io->fio.op = REQ_OP_WRITE; - if (test_opt(sbi, NOBARRIER)) - io->fio.op_flags = WRITE_FLUSH | REQ_META | REQ_PRIO; - else - io->fio.op_flags = WRITE_FLUSH_FUA | REQ_META | - REQ_PRIO; + io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC; + if (!test_opt(sbi, NOBARRIER)) + io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA; } __submit_merged_bio(io); -out: up_write(&io->io_rwsem); } -void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type, - int rw) +static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, + struct inode *inode, nid_t ino, pgoff_t idx, + enum page_type type, bool force) { - __f2fs_submit_merged_bio(sbi, NULL, NULL, 0, type, rw); + enum temp_type temp; + + if (!force && !has_merged_page(sbi, inode, ino, idx, type)) + return; + + for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { + + __f2fs_submit_merged_write(sbi, type, temp); + + /* TODO: use HOT temp only for meta pages now. */ + if (type >= META) + break; + } } -void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi, - struct inode *inode, struct page *page, - nid_t ino, enum page_type type, int rw) +void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) { - if (has_merged_page(sbi, inode, page, ino, type)) - __f2fs_submit_merged_bio(sbi, inode, page, ino, type, rw); + __submit_merged_write_cond(sbi, NULL, 0, 0, type, true); } -void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi) +void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, + struct inode *inode, nid_t ino, pgoff_t idx, + enum page_type type) { - f2fs_submit_merged_bio(sbi, DATA, WRITE); - f2fs_submit_merged_bio(sbi, NODE, WRITE); - f2fs_submit_merged_bio(sbi, META, WRITE); + __submit_merged_write_cond(sbi, inode, ino, idx, type, false); +} + +void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) +{ + f2fs_submit_merged_write(sbi, DATA); + f2fs_submit_merged_write(sbi, NODE); + f2fs_submit_merged_write(sbi, META); } /* * Fill the locked page with data located in the block address. - * Return unlocked page. + * A caller needs to unlock the page on failure. */ int f2fs_submit_page_bio(struct f2fs_io_info *fio) { @@ -254,41 +381,65 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) bio_set_op_attrs(bio, fio->op, fio->op_flags); __submit_bio(fio->sbi, bio, fio->type); + + if (!is_read_io(fio->op)) + inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page)); return 0; } -void f2fs_submit_page_mbio(struct f2fs_io_info *fio) +int f2fs_submit_page_write(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); - struct f2fs_bio_info *io; - bool is_read = is_read_io(fio->op); + struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp; struct page *bio_page; + int err = 0; - io = is_read ? &sbi->read_io : &sbi->write_io[btype]; + f2fs_bug_on(sbi, is_read_io(fio->op)); + + down_write(&io->io_rwsem); +next: + if (fio->in_list) { + spin_lock(&io->io_lock); + if (list_empty(&io->io_list)) { + spin_unlock(&io->io_lock); + goto out_fail; + } + fio = list_first_entry(&io->io_list, + struct f2fs_io_info, list); + list_del(&fio->list); + spin_unlock(&io->io_lock); + } if (fio->old_blkaddr != NEW_ADDR) verify_block_addr(sbi, fio->old_blkaddr); verify_block_addr(sbi, fio->new_blkaddr); - down_write(&io->io_rwsem); + bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; + + /* set submitted = 1 as a return value */ + fio->submitted = 1; + + inc_page_count(sbi, WB_DATA_TYPE(bio_page)); if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 || - (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags))) + (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) || + !__same_bdev(sbi, fio->new_blkaddr, io->bio))) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { - int bio_blocks = MAX_BIO_BLOCKS(sbi); - + if ((fio->type == DATA || fio->type == NODE) && + fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { + err = -EAGAIN; + dec_page_count(sbi, WB_DATA_TYPE(bio_page)); + goto out_fail; + } io->bio = __bio_alloc(sbi, fio->new_blkaddr, - bio_blocks, is_read); + BIO_MAX_PAGES, false); io->fio = *fio; } - bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; - - if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < - PAGE_SIZE) { + if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) { __submit_merged_bio(io); goto alloc_new; } @@ -296,18 +447,74 @@ alloc_new: io->last_block_in_bio = fio->new_blkaddr; f2fs_trace_ios(fio, 0); + trace_f2fs_submit_page_write(fio->page, fio); + + if (fio->in_list) + goto next; +out_fail: up_write(&io->io_rwsem); - trace_f2fs_submit_page_mbio(fio->page, fio); + return err; +} + +static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, + unsigned nr_pages) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct fscrypt_ctx *ctx = NULL; + struct bio *bio; + + if (f2fs_encrypted_file(inode)) { + ctx = fscrypt_get_ctx(inode, GFP_NOFS); + if (IS_ERR(ctx)) + return ERR_CAST(ctx); + + /* wait the page to be moved by cleaning */ + f2fs_wait_on_block_writeback(sbi, blkaddr); + } + + bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES)); + if (!bio) { + if (ctx) + fscrypt_release_ctx(ctx); + return ERR_PTR(-ENOMEM); + } + f2fs_target_device(sbi, blkaddr, bio); + bio->bi_end_io = f2fs_read_end_io; + bio->bi_private = ctx; + bio_set_op_attrs(bio, REQ_OP_READ, 0); + + return bio; +} + +/* This can handle encryption stuffs */ +static int f2fs_submit_page_read(struct inode *inode, struct page *page, + block_t blkaddr) +{ + struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1); + + if (IS_ERR(bio)) + return PTR_ERR(bio); + + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + bio_put(bio); + return -EFAULT; + } + __submit_bio(F2FS_I_SB(inode), bio, DATA); + return 0; } static void __set_data_blkaddr(struct dnode_of_data *dn) { struct f2fs_node *rn = F2FS_NODE(dn->node_page); __le32 *addr_array; + int base = 0; + + if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) + base = get_extra_isize(dn->inode); /* Get physical address of data block */ addr_array = blkaddr_in_node(rn); - addr_array[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); + addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); } /* @@ -335,14 +542,15 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); + int err; if (!count) return 0; if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; - if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count))) - return -ENOSPC; + if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) + return err; trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, dn->ofs_in_node, count); @@ -350,8 +558,8 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) f2fs_wait_on_page_writeback(dn->node_page, NODE, true); for (; count > 0; dn->ofs_in_node++) { - block_t blkaddr = - datablock_addr(dn->node_page, dn->ofs_in_node); + block_t blkaddr = datablock_addr(dn->inode, + dn->node_page, dn->ofs_in_node); if (blkaddr == NULL_ADDR) { dn->data_blkaddr = NEW_ADDR; __set_data_blkaddr(dn); @@ -393,7 +601,7 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) { - struct extent_info ei; + struct extent_info ei = {0,0,0}; struct inode *inode = dn->inode; if (f2fs_lookup_extent_cache(inode, index, &ei)) { @@ -410,18 +618,8 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; struct page *page; - struct extent_info ei; + struct extent_info ei = {0,0,0}; int err; - struct f2fs_io_info fio = { - .sbi = F2FS_I_SB(inode), - .type = DATA, - .op = REQ_OP_READ, - .op_flags = op_flags, - .encrypted_page = NULL, - }; - - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) - return read_mapping_page(mapping, index, NULL); page = f2fs_grab_cache_page(mapping, index, for_write); if (!page) @@ -462,9 +660,7 @@ got_it: return page; } - fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; - fio.page = page; - err = f2fs_submit_page_bio(&fio); + err = f2fs_submit_page_read(inode, page, dn.data_blkaddr); if (err) goto put_err; return page; @@ -484,7 +680,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index) return page; f2fs_put_page(page, 0); - page = get_read_data_page(inode, index, READ_SYNC, false); + page = get_read_data_page(inode, index, 0, false); if (IS_ERR(page)) return page; @@ -510,7 +706,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct page *page; repeat: - page = get_read_data_page(inode, index, READ_SYNC, for_write); + page = get_read_data_page(inode, index, 0, for_write); if (IS_ERR(page)) return page; @@ -591,29 +787,27 @@ static int __allocate_data_block(struct dnode_of_data *dn) struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_summary sum; struct node_info ni; - int seg = CURSEG_WARM_DATA; pgoff_t fofs; blkcnt_t count = 1; + int err; if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; - dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); + dn->data_blkaddr = datablock_addr(dn->inode, + dn->node_page, dn->ofs_in_node); if (dn->data_blkaddr == NEW_ADDR) goto alloc; - if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count))) - return -ENOSPC; + if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) + return err; alloc: get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); - if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page) - seg = CURSEG_DIRECT_IO; - allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, - &sum, seg); + &sum, CURSEG_WARM_DATA, NULL, false); set_data_blkaddr(dn); /* update i_size */ @@ -625,11 +819,21 @@ alloc: return 0; } -ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) +static inline bool __force_buffered_io(struct inode *inode, int rw) +{ + return (f2fs_encrypted_file(inode) || + (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) || + F2FS_I_SB(inode)->s_ndevs); +} + +int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); struct f2fs_map_blocks map; - ssize_t ret = 0; + int err = 0; + + if (is_inode_flag_set(inode, FI_NO_PREALLOC)) + return 0; map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos); map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from)); @@ -641,19 +845,37 @@ ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) map.m_next_pgofs = NULL; if (iocb->ki_flags & IOCB_DIRECT) { - ret = f2fs_convert_inline_inode(inode); - if (ret) - return ret; - return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO); + err = f2fs_convert_inline_inode(inode); + if (err) + return err; + return f2fs_map_blocks(inode, &map, 1, + __force_buffered_io(inode, WRITE) ? + F2FS_GET_BLOCK_PRE_AIO : + F2FS_GET_BLOCK_PRE_DIO); } - if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) { - ret = f2fs_convert_inline_inode(inode); - if (ret) - return ret; + if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) { + err = f2fs_convert_inline_inode(inode); + if (err) + return err; } if (!f2fs_has_inline_data(inode)) return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); - return ret; + return err; +} + +static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) +{ + if (flag == F2FS_GET_BLOCK_PRE_AIO) { + if (lock) + down_read(&sbi->node_change); + else + up_read(&sbi->node_change); + } else { + if (lock) + f2fs_lock_op(sbi); + else + f2fs_unlock_op(sbi); + } } /* @@ -676,8 +898,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int err = 0, ofs = 1; unsigned int ofs_in_node, last_ofs_in_node; blkcnt_t prealloc; - struct extent_info ei; - bool allocated = false; + struct extent_info ei = {0,0,0}; block_t blkaddr; if (!maxblocks) @@ -699,7 +920,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, next_dnode: if (create) - f2fs_lock_op(sbi); + __do_map_lock(sbi, flag, true); /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -721,7 +942,7 @@ next_dnode: end_offset = ADDRS_PER_PAGE(dn.node_page, inode); next_block: - blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) { if (create) { @@ -736,14 +957,12 @@ next_block: } } else { err = __allocate_data_block(&dn); - if (!err) { + if (!err) set_inode_flag(inode, FI_APPEND_WRITE); - allocated = true; - } } if (err) goto sync_out; - map->m_flags = F2FS_MAP_NEW; + map->m_flags |= F2FS_MAP_NEW; blkaddr = dn.data_blkaddr; } else { if (flag == F2FS_GET_BLOCK_BMAP) { @@ -794,7 +1013,6 @@ skip: err = reserve_new_blocks(&dn, prealloc); if (err) goto sync_out; - allocated = dn.node_changed; map->m_len += dn.ofs_in_node - ofs_in_node; if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) { @@ -812,18 +1030,17 @@ skip: f2fs_put_dnode(&dn); if (create) { - f2fs_unlock_op(sbi); - f2fs_balance_fs(sbi, allocated); + __do_map_lock(sbi, flag, false); + f2fs_balance_fs(sbi, dn.node_changed); } - allocated = false; goto next_dnode; sync_out: f2fs_put_dnode(&dn); unlock_out: if (create) { - f2fs_unlock_op(sbi); - f2fs_balance_fs(sbi, allocated); + __do_map_lock(sbi, flag, false); + f2fs_balance_fs(sbi, dn.node_changed); } out: trace_f2fs_map_blocks(inode, map, err); @@ -835,19 +1052,19 @@ static int __get_data_block(struct inode *inode, sector_t iblock, pgoff_t *next_pgofs) { struct f2fs_map_blocks map; - int ret; + int err; map.m_lblk = iblock; map.m_len = bh->b_size >> inode->i_blkbits; map.m_next_pgofs = next_pgofs; - ret = f2fs_map_blocks(inode, &map, create, flag); - if (!ret) { + err = f2fs_map_blocks(inode, &map, create, flag); + if (!err) { map_bh(bh, inode->i_sb, map.m_pblk); bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; - bh->b_size = map.m_len << inode->i_blkbits; + bh->b_size = (u64)map.m_len << inode->i_blkbits; } - return ret; + return err; } static int get_data_block(struct inode *inode, sector_t iblock, @@ -862,7 +1079,7 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { return __get_data_block(inode, iblock, bh_result, create, - F2FS_GET_BLOCK_DIO, NULL); + F2FS_GET_BLOCK_DEFAULT, NULL); } static int get_data_block_bmap(struct inode *inode, sector_t iblock, @@ -892,7 +1109,6 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, struct buffer_head map_bh; sector_t start_blk, last_blk; pgoff_t next_pgofs; - loff_t isize; u64 logical = 0, phys = 0, size = 0; u32 flags = 0; int ret = 0; @@ -909,13 +1125,6 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, inode_lock(inode); - isize = i_size_read(inode); - if (start >= isize) - goto out; - - if (start + len > isize) - len = isize - start; - if (logical_to_blk(inode, len) == 0) len = blk_to_logical(inode, 1); @@ -934,13 +1143,11 @@ next: /* HOLE */ if (!buffer_mapped(&map_bh)) { start_blk = next_pgofs; - /* Go through holes util pass the EOF */ - if (blk_to_logical(inode, start_blk) < isize) + + if (blk_to_logical(inode, start_blk) < blk_to_logical(inode, + F2FS_I_SB(inode)->max_file_blocks)) goto prep_next; - /* Found a hole beyond isize means no more extents. - * Note that the premise is that filesystems don't - * punch holes beyond isize and keep size unchanged. - */ + flags |= FIEMAP_EXTENT_LAST; } @@ -978,37 +1185,6 @@ out: return ret; } -static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr, - unsigned nr_pages) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct fscrypt_ctx *ctx = NULL; - struct block_device *bdev = sbi->sb->s_bdev; - struct bio *bio; - - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { - ctx = fscrypt_get_ctx(inode, GFP_NOFS); - if (IS_ERR(ctx)) - return ERR_CAST(ctx); - - /* wait the page to be moved by cleaning */ - f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr); - } - - bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES)); - if (!bio) { - if (ctx) - fscrypt_release_ctx(ctx); - return ERR_PTR(-ENOMEM); - } - bio->bi_bdev = bdev; - bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blkaddr); - bio->bi_end_io = f2fs_read_end_io; - bio->bi_private = ctx; - - return bio; -} - /* * This function was originally taken from fs/mpage.c, and customized for f2fs. * Major change was from block_size == page_size in f2fs by default. @@ -1037,9 +1213,10 @@ static int f2fs_mpage_readpages(struct address_space *mapping, for (page_idx = 0; nr_pages; page_idx++, nr_pages--) { - prefetchw(&page->flags); if (pages) { - page = list_entry(pages->prev, struct page, lru); + page = list_last_entry(pages, struct page, lru); + + prefetchw(&page->flags); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, page->index, @@ -1073,7 +1250,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, map.m_len = last_block - block_in_file; if (f2fs_map_blocks(inode, &map, 0, - F2FS_GET_BLOCK_READ)) + F2FS_GET_BLOCK_DEFAULT)) goto set_error_page; } got_it: @@ -1097,18 +1274,18 @@ got_it: * This page will go to BIO. Do we need to send this * BIO off first? */ - if (bio && (last_block_in_bio != block_nr - 1)) { + if (bio && (last_block_in_bio != block_nr - 1 || + !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) { submit_and_realloc: __submit_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; } if (bio == NULL) { - bio = f2fs_grab_bio(inode, block_nr, nr_pages); + bio = f2fs_grab_read_bio(inode, block_nr, nr_pages); if (IS_ERR(bio)) { bio = NULL; goto set_error_page; } - bio_set_op_attrs(bio, REQ_OP_READ, 0); } if (bio_add_page(bio, page, blocksize, 0) < blocksize) @@ -1157,7 +1334,7 @@ static int f2fs_read_data_pages(struct file *file, struct list_head *pages, unsigned nr_pages) { struct inode *inode = file->f_mapping->host; - struct page *page = list_entry(pages->prev, struct page, lru); + struct page *page = list_last_entry(pages, struct page, lru); trace_f2fs_readpages(inode, page, nr_pages); @@ -1168,17 +1345,84 @@ static int f2fs_read_data_pages(struct file *file, return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages); } +static int encrypt_one_page(struct f2fs_io_info *fio) +{ + struct inode *inode = fio->page->mapping->host; + gfp_t gfp_flags = GFP_NOFS; + + if (!f2fs_encrypted_file(inode)) + return 0; + + /* wait for GCed encrypted page writeback */ + f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr); + +retry_encrypt: + fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, + PAGE_SIZE, 0, fio->page->index, gfp_flags); + if (!IS_ERR(fio->encrypted_page)) + return 0; + + /* flush pending IOs and wait for a while in the ENOMEM case */ + if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { + f2fs_flush_merged_writes(fio->sbi); + congestion_wait(BLK_RW_ASYNC, HZ/50); + gfp_flags |= __GFP_NOFAIL; + goto retry_encrypt; + } + return PTR_ERR(fio->encrypted_page); +} + +static inline bool need_inplace_update(struct f2fs_io_info *fio) +{ + struct inode *inode = fio->page->mapping->host; + + if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) + return false; + if (is_cold_data(fio->page)) + return false; + if (IS_ATOMIC_WRITTEN_PAGE(fio->page)) + return false; + + return need_inplace_update_policy(inode, fio); +} + +static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio) +{ + if (fio->old_blkaddr == NEW_ADDR) + return false; + if (fio->old_blkaddr == NULL_ADDR) + return false; + return true; +} + int do_write_data_page(struct f2fs_io_info *fio) { struct page *page = fio->page; struct inode *inode = page->mapping->host; struct dnode_of_data dn; + struct extent_info ei = {0,0,0}; + bool ipu_force = false; int err = 0; set_new_dnode(&dn, inode, NULL, NULL, 0); + if (need_inplace_update(fio) && + f2fs_lookup_extent_cache(inode, page->index, &ei)) { + fio->old_blkaddr = ei.blk + page->index - ei.fofs; + + if (valid_ipu_blkaddr(fio)) { + ipu_force = true; + fio->need_lock = LOCK_DONE; + goto got_it; + } + } + + /* Deadlock due to between page->lock and f2fs_lock_op */ + if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi)) + return -EAGAIN; + err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); if (err) - return err; + goto out; fio->old_blkaddr = dn.data_blkaddr; @@ -1187,57 +1431,57 @@ int do_write_data_page(struct f2fs_io_info *fio) ClearPageUptodate(page); goto out_writepage; } - - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { - gfp_t gfp_flags = GFP_NOFS; - - /* wait for GCed encrypted page writeback */ - f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode), - fio->old_blkaddr); -retry_encrypt: - fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, - gfp_flags); - if (IS_ERR(fio->encrypted_page)) { - err = PTR_ERR(fio->encrypted_page); - if (err == -ENOMEM) { - /* flush pending ios and wait for a while */ - f2fs_flush_merged_bios(F2FS_I_SB(inode)); - congestion_wait(BLK_RW_ASYNC, HZ/50); - gfp_flags |= __GFP_NOFAIL; - err = 0; - goto retry_encrypt; - } - goto out_writepage; - } - } - - set_page_writeback(page); - +got_it: /* * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (unlikely(fio->old_blkaddr != NEW_ADDR && - !is_cold_data(page) && - !IS_ATOMIC_WRITTEN_PAGE(page) && - need_inplace_update(inode))) { - rewrite_data_page(fio); + if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) { + err = encrypt_one_page(fio); + if (err) + goto out_writepage; + + set_page_writeback(page); + f2fs_put_dnode(&dn); + if (fio->need_lock == LOCK_REQ) + f2fs_unlock_op(fio->sbi); + err = rewrite_data_page(fio); + trace_f2fs_do_write_data_page(fio->page, IPU); set_inode_flag(inode, FI_UPDATE_WRITE); - trace_f2fs_do_write_data_page(page, IPU); - } else { - write_data_page(&dn, fio); - trace_f2fs_do_write_data_page(page, OPU); - set_inode_flag(inode, FI_APPEND_WRITE); - if (page->index == 0) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); + return err; } + + if (fio->need_lock == LOCK_RETRY) { + if (!f2fs_trylock_op(fio->sbi)) { + err = -EAGAIN; + goto out_writepage; + } + fio->need_lock = LOCK_REQ; + } + + err = encrypt_one_page(fio); + if (err) + goto out_writepage; + + set_page_writeback(page); + + /* LFS mode write path */ + write_data_page(&dn, fio); + trace_f2fs_do_write_data_page(page, OPU); + set_inode_flag(inode, FI_APPEND_WRITE); + if (page->index == 0) + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); out_writepage: f2fs_put_dnode(&dn); +out: + if (fio->need_lock == LOCK_REQ) + f2fs_unlock_op(fio->sbi); return err; } -static int f2fs_write_data_page(struct page *page, - struct writeback_control *wbc) +static int __write_data_page(struct page *page, bool *submitted, + struct writeback_control *wbc, + enum iostat_type io_type) { struct inode *inode = page->mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -1252,13 +1496,20 @@ static int f2fs_write_data_page(struct page *page, .sbi = sbi, .type = DATA, .op = REQ_OP_WRITE, - .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0, + .op_flags = wbc_to_write_flags(wbc), + .old_blkaddr = NULL_ADDR, .page = page, .encrypted_page = NULL, + .submitted = false, + .need_lock = LOCK_RETRY, + .io_type = io_type, }; trace_f2fs_writepage(page, DATA); + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + goto redirty_out; + if (page->index < end_index) goto write; @@ -1272,8 +1523,6 @@ static int f2fs_write_data_page(struct page *page, zero_user_segment(page, offset, PAGE_SIZE); write: - if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) - goto redirty_out; if (f2fs_is_drop_cache(inode)) goto out; /* we should not write 0'th page having journal header */ @@ -1290,6 +1539,7 @@ write: /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { + fio.need_lock = LOCK_DONE; err = do_write_data_page(&fio); goto done; } @@ -1298,52 +1548,78 @@ write: need_balance_fs = true; else if (has_not_enough_free_secs(sbi, 0, 0)) goto redirty_out; + else + set_inode_flag(inode, FI_HOT_DATA); err = -EAGAIN; - f2fs_lock_op(sbi); - if (f2fs_has_inline_data(inode)) + if (f2fs_has_inline_data(inode)) { err = f2fs_write_inline_data(inode, page); - if (err == -EAGAIN) + if (!err) + goto out; + } + + if (err == -EAGAIN) { err = do_write_data_page(&fio); + if (err == -EAGAIN) { + fio.need_lock = LOCK_REQ; + err = do_write_data_page(&fio); + } + } if (F2FS_I(inode)->last_disk_size < psize) F2FS_I(inode)->last_disk_size = psize; - f2fs_unlock_op(sbi); + done: if (err && err != -ENOENT) goto redirty_out; - clear_cold_data(page); out: inode_dec_dirty_pages(inode); if (err) ClearPageUptodate(page); if (wbc->for_reclaim) { - f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, DATA, WRITE); + f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA); + clear_inode_flag(inode, FI_HOT_DATA); remove_dirty_inode(inode); + submitted = NULL; } unlock_page(page); - f2fs_balance_fs(sbi, need_balance_fs); + if (!S_ISDIR(inode->i_mode)) + f2fs_balance_fs(sbi, need_balance_fs); - if (unlikely(f2fs_cp_error(sbi))) - f2fs_submit_merged_bio(sbi, DATA, WRITE); + if (unlikely(f2fs_cp_error(sbi))) { + f2fs_submit_merged_write(sbi, DATA); + submitted = NULL; + } + + if (submitted) + *submitted = fio.submitted; return 0; redirty_out: redirty_page_for_writepage(wbc, page); + if (!err) + return AOP_WRITEPAGE_ACTIVATE; unlock_page(page); return err; } +static int f2fs_write_data_page(struct page *page, + struct writeback_control *wbc) +{ + return __write_data_page(page, NULL, wbc, FS_DATA_IO); +} + /* * This function was copied from write_cche_pages from mm/page-writeback.c. * The major change is making write step of cold data page separately from * warm/hot data page. */ static int f2fs_write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc) + struct writeback_control *wbc, + enum iostat_type io_type) { int ret = 0; int done = 0; @@ -1353,13 +1629,19 @@ static int f2fs_write_cache_pages(struct address_space *mapping, pgoff_t index; pgoff_t end; /* Inclusive */ pgoff_t done_index; + pgoff_t last_idx = ULONG_MAX; int cycled; int range_whole = 0; int tag; - int nwritten = 0; pagevec_init(&pvec, 0); + if (get_dirty_pages(mapping->host) <= + SM_I(F2FS_M_SB(mapping))->min_hot_blocks) + set_inode_flag(mapping->host, FI_HOT_DATA); + else + clear_inode_flag(mapping->host, FI_HOT_DATA); + if (wbc->range_cyclic) { writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; @@ -1393,6 +1675,7 @@ retry: for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; + bool submitted = false; if (page->index > end) { done = 1; @@ -1400,7 +1683,7 @@ retry: } done_index = page->index; - +retry_write: lock_page(page); if (unlikely(page->mapping != mapping)) { @@ -1426,17 +1709,37 @@ continue_unlock: if (!clear_page_dirty_for_io(page)) goto continue_unlock; - ret = mapping->a_ops->writepage(page, wbc); + ret = __write_data_page(page, &submitted, wbc, io_type); if (unlikely(ret)) { + /* + * keep nr_to_write, since vfs uses this to + * get # of written pages. + */ + if (ret == AOP_WRITEPAGE_ACTIVATE) { + unlock_page(page); + ret = 0; + continue; + } else if (ret == -EAGAIN) { + ret = 0; + if (wbc->sync_mode == WB_SYNC_ALL) { + cond_resched(); + congestion_wait(BLK_RW_ASYNC, + HZ/50); + goto retry_write; + } + continue; + } done_index = page->index + 1; done = 1; break; - } else { - nwritten++; + } else if (submitted) { + last_idx = page->index; } - if (--wbc->nr_to_write <= 0 && - wbc->sync_mode == WB_SYNC_NONE) { + /* give a priority to WB_SYNC threads */ + if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) || + --wbc->nr_to_write <= 0) && + wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; } @@ -1454,15 +1757,16 @@ continue_unlock: if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; - if (nwritten) - f2fs_submit_merged_bio_cond(F2FS_M_SB(mapping), mapping->host, - NULL, 0, DATA, WRITE); + if (last_idx != ULONG_MAX) + f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host, + 0, last_idx, DATA); return ret; } -static int f2fs_write_data_pages(struct address_space *mapping, - struct writeback_control *wbc) +int __f2fs_write_data_pages(struct address_space *mapping, + struct writeback_control *wbc, + enum iostat_type io_type) { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -1477,6 +1781,10 @@ static int f2fs_write_data_pages(struct address_space *mapping, if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE) return 0; + /* during POR, we don't need to trigger writepage at all. */ + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + goto skip_write; + if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && available_free_memory(sbi, DIRTY_DENTS)) @@ -1486,15 +1794,20 @@ static int f2fs_write_data_pages(struct address_space *mapping, if (is_inode_flag_set(inode, FI_DO_DEFRAG)) goto skip_write; - /* during POR, we don't need to trigger writepage at all. */ - if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) - goto skip_write; - trace_f2fs_writepages(mapping->host, wbc, DATA); + /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ + if (wbc->sync_mode == WB_SYNC_ALL) + atomic_inc(&sbi->wb_sync_req); + else if (atomic_read(&sbi->wb_sync_req)) + goto skip_write; + blk_start_plug(&plug); - ret = f2fs_write_cache_pages(mapping, wbc); + ret = f2fs_write_cache_pages(mapping, wbc, io_type); blk_finish_plug(&plug); + + if (wbc->sync_mode == WB_SYNC_ALL) + atomic_dec(&sbi->wb_sync_req); /* * if some pages were truncated, we cannot guarantee its mapping->host * to detect pending bios. @@ -1509,14 +1822,26 @@ skip_write: return 0; } +static int f2fs_write_data_pages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct inode *inode = mapping->host; + + return __f2fs_write_data_pages(mapping, wbc, + F2FS_I(inode)->cp_task == current ? + FS_CP_DATA_IO : FS_DATA_IO); +} + static void f2fs_write_failed(struct address_space *mapping, loff_t to) { struct inode *inode = mapping->host; loff_t i_size = i_size_read(inode); if (to > i_size) { + down_write(&F2FS_I(inode)->i_mmap_sem); truncate_pagecache(inode, i_size); truncate_blocks(inode, i_size, true); + up_write(&F2FS_I(inode)->i_mmap_sem); } } @@ -1529,19 +1854,20 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, struct dnode_of_data dn; struct page *ipage; bool locked = false; - struct extent_info ei; + struct extent_info ei = {0,0,0}; int err = 0; /* * we already allocated all the blocks, so we don't need to get * the block addresses when there is no need to fill the page. */ - if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE) + if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE && + !is_inode_flag_set(inode, FI_NO_PREALLOC)) return 0; if (f2fs_has_inline_data(inode) || (pos & PAGE_MASK) >= i_size_read(inode)) { - f2fs_lock_op(sbi); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); locked = true; } restart: @@ -1555,7 +1881,7 @@ restart: set_new_dnode(&dn, inode, ipage, ipage, 0); if (f2fs_has_inline_data(inode)) { - if (pos + len <= MAX_INLINE_DATA) { + if (pos + len <= MAX_INLINE_DATA(inode)) { read_inline_data(page, ipage); set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) @@ -1577,7 +1903,8 @@ restart: err = get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err || dn.data_blkaddr == NULL_ADDR) { f2fs_put_dnode(&dn); - f2fs_lock_op(sbi); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, + true); locked = true; goto restart; } @@ -1591,7 +1918,7 @@ out: f2fs_put_dnode(&dn); unlock_out: if (locked) - f2fs_unlock_op(sbi); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); return err; } @@ -1630,7 +1957,12 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, goto fail; } repeat: - page = grab_cache_page_write_begin(mapping, index, flags); + /* + * Do not use grab_cache_page_write_begin() to avoid deadlock due to + * wait_for_stable_page. Will wait that below with our IO control. + */ + page = pagecache_get_page(mapping, index, + FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS); if (!page) { err = -ENOMEM; goto fail; @@ -1657,31 +1989,24 @@ repeat: f2fs_wait_on_page_writeback(page, DATA, false); /* wait for GCed encrypted page writeback */ - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) - f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr); + if (f2fs_encrypted_file(inode)) + f2fs_wait_on_block_writeback(sbi, blkaddr); if (len == PAGE_SIZE || PageUptodate(page)) return 0; + if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) { + zero_user_segment(page, len, PAGE_SIZE); + return 0; + } + if (blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); SetPageUptodate(page); } else { - struct bio *bio; - - bio = f2fs_grab_bio(inode, blkaddr, 1); - if (IS_ERR(bio)) { - err = PTR_ERR(bio); + err = f2fs_submit_page_read(inode, page, blkaddr); + if (err) goto fail; - } - bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC); - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - bio_put(bio); - err = -EFAULT; - goto fail; - } - - __submit_bio(sbi, bio, DATA); lock_page(page); if (unlikely(page->mapping != mapping)) { @@ -1717,7 +2042,7 @@ static int f2fs_write_end(struct file *file, * let generic_perform_write() try to copy data again through copied=0. */ if (!PageUptodate(page)) { - if (unlikely(copied != PAGE_SIZE)) + if (unlikely(copied != len)) copied = 0; else SetPageUptodate(page); @@ -1726,7 +2051,6 @@ static int f2fs_write_end(struct file *file, goto unlock_out; set_page_dirty(page); - clear_cold_data(page); if (pos + copied > i_size_read(inode)) f2fs_i_size_write(inode, pos + copied); @@ -1763,9 +2087,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (err) return err; - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) - return 0; - if (test_opt(F2FS_I_SB(inode), LFS)) + if (__force_buffered_io(inode, rw)) return 0; trace_f2fs_direct_IO_enter(inode, offset, count, rw); @@ -1798,10 +2120,13 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) up_read(&F2FS_I(inode)->dio_rwsem[rw]); if (rw == WRITE) { - if (err > 0) + if (err > 0) { + f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO, + err); set_inode_flag(inode, FI_UPDATE_WRITE); - else if (err < 0) + } else if (err < 0) { f2fs_write_failed(mapping, offset + count); + } } if (trace_android_fs_dataread_start_enabled() && @@ -1827,17 +2152,19 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset, return; if (PageDirty(page)) { - if (inode->i_ino == F2FS_META_INO(sbi)) + if (inode->i_ino == F2FS_META_INO(sbi)) { dec_page_count(sbi, F2FS_DIRTY_META); - else if (inode->i_ino == F2FS_NODE_INO(sbi)) + } else if (inode->i_ino == F2FS_NODE_INO(sbi)) { dec_page_count(sbi, F2FS_DIRTY_NODES); - else + } else { inode_dec_dirty_pages(inode); + remove_dirty_inode(inode); + } } /* This is atomic written page, keep Private */ if (IS_ATOMIC_WRITTEN_PAGE(page)) - return; + return drop_inmem_page(inode, page); set_page_private(page, 0); ClearPagePrivate(page); @@ -1897,7 +2224,7 @@ static int f2fs_set_data_page_dirty(struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); - if (f2fs_is_atomic_file(inode)) { + if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) { if (!IS_ATOMIC_WRITTEN_PAGE(page)) { register_inmem_page(inode, page); return 1; @@ -1944,8 +2271,12 @@ int f2fs_migrate_page(struct address_space *mapping, BUG_ON(PageWriteback(page)); /* migrating an atomic written page is safe with the inmem_lock hold */ - if (atomic_written && !mutex_trylock(&fi->inmem_lock)) - return -EAGAIN; + if (atomic_written) { + if (mode != MIGRATE_SYNC) + return -EBUSY; + if (!mutex_trylock(&fi->inmem_lock)) + return -EAGAIN; + } /* * A reference is expected if PagePrivate set when move mapping, diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 687998e9557c..87f449845f5f 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -50,7 +50,27 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); - si->wb_bios = atomic_read(&sbi->nr_wb_bios); + si->aw_cnt = atomic_read(&sbi->aw_cnt); + si->vw_cnt = atomic_read(&sbi->vw_cnt); + si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt); + si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt); + si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA); + si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA); + if (SM_I(sbi) && SM_I(sbi)->fcc_info) { + si->nr_flushed = + atomic_read(&SM_I(sbi)->fcc_info->issued_flush); + si->nr_flushing = + atomic_read(&SM_I(sbi)->fcc_info->issing_flush); + } + if (SM_I(sbi) && SM_I(sbi)->dcc_info) { + si->nr_discarded = + atomic_read(&SM_I(sbi)->dcc_info->issued_discard); + si->nr_discarding = + atomic_read(&SM_I(sbi)->dcc_info->issing_discard); + si->nr_discard_cmd = + atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt); + si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks; + } si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; si->rsvd_segs = reserved_segments(sbi); si->overp_segs = overprovision_segments(sbi); @@ -61,6 +81,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->inline_xattr = atomic_read(&sbi->inline_xattr); si->inline_inode = atomic_read(&sbi->inline_inode); si->inline_dir = atomic_read(&sbi->inline_dir); + si->append = sbi->im[APPEND_INO].ino_num; + si->update = sbi->im[UPDATE_INO].ino_num; si->orphans = sbi->im[ORPHAN_INO].ino_num; si->utilization = utilization(sbi); @@ -74,7 +96,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; si->sits = MAIN_SEGS(sbi); si->dirty_sits = SIT_I(sbi)->dirty_sentries; - si->fnids = NM_I(sbi)->fcnt; + si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST]; + si->avail_nids = NM_I(sbi)->available_nids; + si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]; si->bg_gc = sbi->bg_gc; si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) @@ -87,8 +111,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) { struct curseg_info *curseg = CURSEG_I(sbi, i); si->curseg[i] = curseg->segno; - si->cursec[i] = curseg->segno / sbi->segs_per_sec; - si->curzone[i] = si->cursec[i] / sbi->secs_per_zone; + si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno); + si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]); } for (i = 0; i < 2; i++) { @@ -112,10 +136,10 @@ static void update_sit_info(struct f2fs_sb_info *sbi) bimodal = 0; total_vblocks = 0; - blks_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg; + blks_per_sec = BLKS_PER_SEC(sbi); hblks_per_sec = blks_per_sec / 2; for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { - vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); + vblocks = get_valid_blocks(sbi, segno, true); dist = abs(vblocks - hblks_per_sec); bimodal += dist * dist; @@ -144,7 +168,11 @@ static void update_mem_info(struct f2fs_sb_info *sbi) if (si->base_mem) goto get_cache; - si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; + /* build stat */ + si->base_mem = sizeof(struct f2fs_stat_info); + + /* build superblock */ + si->base_mem += sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; si->base_mem += 2 * sizeof(struct f2fs_inode_info); si->base_mem += sizeof(*sbi->ckpt); si->base_mem += sizeof(struct percpu_counter) * NR_COUNT_TYPE; @@ -181,6 +209,10 @@ static void update_mem_info(struct f2fs_sb_info *sbi) /* build nm */ si->base_mem += sizeof(struct f2fs_nm_info); si->base_mem += __bitmap_size(sbi, NAT_BITMAP); + si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS); + si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE; + si->base_mem += NM_I(sbi)->nat_blocks / 8; + si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short); get_cache: si->cache_mem = 0; @@ -190,11 +222,18 @@ get_cache: si->cache_mem += sizeof(struct f2fs_gc_kthread); /* build merge flush thread */ - if (SM_I(sbi)->cmd_control_info) + if (SM_I(sbi)->fcc_info) si->cache_mem += sizeof(struct flush_cmd_control); + if (SM_I(sbi)->dcc_info) { + si->cache_mem += sizeof(struct discard_cmd_control); + si->cache_mem += sizeof(struct discard_cmd) * + atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt); + } /* free nids */ - si->cache_mem += NM_I(sbi)->fcnt * sizeof(struct free_nid); + si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] + + NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]) * + sizeof(struct free_nid); si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry); si->cache_mem += NM_I(sbi)->dirty_nat_cnt * sizeof(struct nat_entry_set); @@ -250,8 +289,8 @@ static int stat_show(struct seq_file *s, void *v) si->inline_inode); seq_printf(s, " - Inline_dentry Inode: %u\n", si->inline_dir); - seq_printf(s, " - Orphan Inode: %u\n", - si->orphans); + seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n", + si->orphans, si->append, si->update); seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", si->main_area_segs, si->main_area_sections, si->main_area_zones); @@ -310,8 +349,16 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - inmem: %4d, wb_bios: %4d\n", - si->inmem_pages, si->wb_bios); + seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d), " + "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n", + si->nr_wb_cp_data, si->nr_wb_data, + si->nr_flushing, si->nr_flushed, + si->nr_discarding, si->nr_discarded, + si->nr_discard_cmd, si->undiscard_blks); + seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), " + "volatile IO: %4d (Max. %4d)\n", + si->inmem_pages, si->aw_cnt, si->max_aw_cnt, + si->vw_cnt, si->max_vw_cnt); seq_printf(s, " - nodes: %4d in %4d\n", si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n", @@ -324,8 +371,8 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_imeta); seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", si->dirty_nats, si->nats, si->dirty_sits, si->sits); - seq_printf(s, " - free_nids: %9d\n", - si->fnids); + seq_printf(s, " - free_nids: %9d/%9d\n - alloc_nids: %9d\n", + si->free_nids, si->avail_nids, si->alloc_nids); seq_puts(s, "\nDistribution of User Blocks:"); seq_puts(s, " [ valid | invalid | free ]\n"); seq_puts(s, " ["); @@ -410,6 +457,11 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->inline_dir, 0); atomic_set(&sbi->inplace_count, 0); + atomic_set(&sbi->aw_cnt, 0); + atomic_set(&sbi->vw_cnt, 0); + atomic_set(&sbi->max_aw_cnt, 0); + atomic_set(&sbi->max_vw_cnt, 0); + mutex_lock(&f2fs_stat_mutex); list_add_tail(&si->stat_list, &f2fs_stat_list); mutex_unlock(&f2fs_stat_mutex); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 11f3717ce481..c0c933ad43c8 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -94,7 +94,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); + make_dentry_ptr_block(NULL, &d, dentry_blk); de = find_target_dentry(fname, namehash, max_slots, &d); if (de) *res_page = dentry_page; @@ -111,8 +111,6 @@ struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname, struct f2fs_dir_entry *de; unsigned long bit_pos = 0; int max_len = 0; - struct fscrypt_str de_name = FSTR_INIT(NULL, 0); - struct fscrypt_str *name = &fname->disk_name; if (max_slots) *max_slots = 0; @@ -130,29 +128,11 @@ struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname, continue; } - if (de->hash_code != namehash) - goto not_match; - - de_name.name = d->filename[bit_pos]; - de_name.len = le16_to_cpu(de->name_len); - -#ifdef CONFIG_F2FS_FS_ENCRYPTION - if (unlikely(!name->name)) { - if (fname->usr_fname->name[0] == '_') { - if (de_name.len > 32 && - !memcmp(de_name.name + ((de_name.len - 17) & ~15), - fname->crypto_buf.name + 8, 16)) - goto found; - goto not_match; - } - name->name = fname->crypto_buf.name; - name->len = fname->crypto_buf.len; - } -#endif - if (de_name.len == name->len && - !memcmp(de_name.name, name->name, name->len)) + if (de->hash_code == namehash && + fscrypt_match_name(fname, d->filename[bit_pos], + le16_to_cpu(de->name_len))) goto found; -not_match: + if (max_slots && max_len > *max_slots) *max_slots = max_len; max_len = 0; @@ -212,13 +192,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, f2fs_put_page(dentry_page, 0); } - /* This is to increase the speed of f2fs_create */ - if (!de && room) { - F2FS_I(dir)->task = current; - if (F2FS_I(dir)->chash != namehash) { - F2FS_I(dir)->chash = namehash; - F2FS_I(dir)->clevel = level; - } + if (!de && room && F2FS_I(dir)->chash != namehash) { + F2FS_I(dir)->chash = namehash; + F2FS_I(dir)->clevel = level; } return de; @@ -259,6 +235,9 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, break; } out: + /* This is to increase the speed of f2fs_create */ + if (!de) + F2FS_I(dir)->task = current; return de; } @@ -277,7 +256,10 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, err = fscrypt_setup_filename(dir, child, 1, &fname); if (err) { - *res_page = ERR_PTR(err); + if (err == -ENOENT) + *res_page = NULL; + else + *res_page = ERR_PTR(err); return NULL; } @@ -322,7 +304,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, set_page_dirty(page); dir->i_mtime = dir->i_ctime = current_time(dir); - f2fs_mark_inode_dirty_sync(dir); + f2fs_mark_inode_dirty_sync(dir, false); f2fs_put_page(page, 1); } @@ -339,24 +321,6 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) set_page_dirty(ipage); } -int update_dent_inode(struct inode *inode, struct inode *to, - const struct qstr *name) -{ - struct page *page; - - if (file_enc_name(to)) - return 0; - - page = get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(page)) - return PTR_ERR(page); - - init_dent_inode(name, page); - f2fs_put_page(page, 1); - - return 0; -} - void do_make_empty_dir(struct inode *inode, struct inode *parent, struct f2fs_dentry_ptr *d) { @@ -386,7 +350,7 @@ static int make_empty_dir(struct inode *inode, dentry_blk = kmap_atomic(dentry_page); - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); + make_dentry_ptr_block(NULL, &d, dentry_blk); do_make_empty_dir(inode, parent, &d); kunmap_atomic(dentry_blk); @@ -440,15 +404,19 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, set_cold_node(inode, page); } - if (new_name) + if (new_name) { init_dent_inode(new_name, page); + if (f2fs_encrypted_inode(dir)) + file_set_enc_name(inode); + } /* * This file should be checkpointed during fsync. * We lost i_pino from now on. */ if (is_inode_flag_set(inode, FI_INC_LINK)) { - file_lost_pino(inode); + if (!S_ISDIR(inode->i_mode)) + file_lost_pino(inode); /* * If link the tmpfile to alias through linkat path, * we should remove this inode from orphan list. @@ -475,7 +443,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode, clear_inode_flag(inode, FI_NEW_INODE); } dir->i_mtime = dir->i_ctime = current_time(dir); - f2fs_mark_inode_dirty_sync(dir); + f2fs_mark_inode_dirty_sync(dir, false); if (F2FS_I(dir)->i_current_depth != current_depth) f2fs_i_depth_write(dir, current_depth); @@ -554,8 +522,10 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, start: #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) + if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) { + f2fs_show_injection_info(FAULT_DIR_DEPTH); return -ENOSPC; + } #endif if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) return -ENOSPC; @@ -599,11 +569,9 @@ add_dentry: err = PTR_ERR(page); goto fail; } - if (f2fs_encrypted_inode(dir)) - file_set_enc_name(inode); } - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); + make_dentry_ptr_block(NULL, &d, dentry_blk); f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos); set_page_dirty(dentry_page); @@ -737,6 +705,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); + struct address_space *mapping = page_mapping(page); + unsigned long flags; int i; f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); @@ -750,7 +720,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, dentry_blk = page_address(page); bit_pos = dentry - dentry_blk->dentry; for (i = 0; i < slots; i++) - clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + __clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); /* Let's check and deallocate this dentry page */ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, @@ -760,17 +730,23 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, set_page_dirty(page); dir->i_ctime = dir->i_mtime = current_time(dir); - f2fs_mark_inode_dirty_sync(dir); + f2fs_mark_inode_dirty_sync(dir, false); if (inode) f2fs_drop_nlink(dir, inode); if (bit_pos == NR_DENTRY_IN_BLOCK && !truncate_hole(dir, page->index, page->index + 1)) { + spin_lock_irqsave(&mapping->tree_lock, flags); + radix_tree_tag_clear(&mapping->page_tree, page_index(page), + PAGECACHE_TAG_DIRTY); + spin_unlock_irqrestore(&mapping->tree_lock, flags); + clear_page_dirty_for_io(page); ClearPagePrivate(page); ClearPageUptodate(page); inode_dec_dirty_pages(dir); + remove_dirty_inode(dir); } f2fs_put_page(page, 1); } @@ -813,7 +789,7 @@ bool f2fs_empty_dir(struct inode *dir) return true; } -bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, +int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, unsigned int start_pos, struct fscrypt_str *fstr) { unsigned char d_type = DT_UNKNOWN; @@ -848,7 +824,7 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, (u32)de->hash_code, 0, &de_name, fstr); if (err) - return true; + return err; de_name = *fstr; fstr->len = save_len; @@ -856,12 +832,12 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, if (!dir_emit(ctx, de_name.name, de_name.len, le32_to_cpu(de->ino), d_type)) - return true; + return 1; bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); ctx->pos = start_pos + bit_pos; } - return false; + return 0; } static int f2fs_readdir(struct file *file, struct dir_context *ctx) @@ -900,17 +876,21 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) dentry_page = get_lock_data_page(inode, n, false); if (IS_ERR(dentry_page)) { err = PTR_ERR(dentry_page); - if (err == -ENOENT) + if (err == -ENOENT) { + err = 0; continue; - else + } else { goto out; + } } dentry_blk = kmap(dentry_page); - make_dentry_ptr(inode, &d, (void *)dentry_blk, 1); + make_dentry_ptr_block(inode, &d, dentry_blk); - if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr)) { + err = f2fs_fill_dentries(ctx, &d, + n * NR_DENTRY_IN_BLOCK, &fstr); + if (err) { kunmap(dentry_page); f2fs_put_page(dentry_page, 1); break; @@ -920,10 +900,9 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) kunmap(dentry_page); f2fs_put_page(dentry_page, 1); } - err = 0; out: fscrypt_fname_free_buffer(&fstr); - return err; + return err < 0 ? err : 0; } static int f2fs_dir_open(struct inode *inode, struct file *filp) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 7b32ce979fe1..ff2352a0ed15 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -18,6 +18,179 @@ #include "node.h" #include +static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re, + unsigned int ofs) +{ + if (cached_re) { + if (cached_re->ofs <= ofs && + cached_re->ofs + cached_re->len > ofs) { + return cached_re; + } + } + return NULL; +} + +static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root, + unsigned int ofs) +{ + struct rb_node *node = root->rb_node; + struct rb_entry *re; + + while (node) { + re = rb_entry(node, struct rb_entry, rb_node); + + if (ofs < re->ofs) + node = node->rb_left; + else if (ofs >= re->ofs + re->len) + node = node->rb_right; + else + return re; + } + return NULL; +} + +struct rb_entry *__lookup_rb_tree(struct rb_root *root, + struct rb_entry *cached_re, unsigned int ofs) +{ + struct rb_entry *re; + + re = __lookup_rb_tree_fast(cached_re, ofs); + if (!re) + return __lookup_rb_tree_slow(root, ofs); + + return re; +} + +struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, + struct rb_root *root, struct rb_node **parent, + unsigned int ofs) +{ + struct rb_node **p = &root->rb_node; + struct rb_entry *re; + + while (*p) { + *parent = *p; + re = rb_entry(*parent, struct rb_entry, rb_node); + + if (ofs < re->ofs) + p = &(*p)->rb_left; + else if (ofs >= re->ofs + re->len) + p = &(*p)->rb_right; + else + f2fs_bug_on(sbi, 1); + } + + return p; +} + +/* + * lookup rb entry in position of @ofs in rb-tree, + * if hit, return the entry, otherwise, return NULL + * @prev_ex: extent before ofs + * @next_ex: extent after ofs + * @insert_p: insert point for new extent at ofs + * in order to simpfy the insertion after. + * tree must stay unchanged between lookup and insertion. + */ +struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, + struct rb_entry *cached_re, + unsigned int ofs, + struct rb_entry **prev_entry, + struct rb_entry **next_entry, + struct rb_node ***insert_p, + struct rb_node **insert_parent, + bool force) +{ + struct rb_node **pnode = &root->rb_node; + struct rb_node *parent = NULL, *tmp_node; + struct rb_entry *re = cached_re; + + *insert_p = NULL; + *insert_parent = NULL; + *prev_entry = NULL; + *next_entry = NULL; + + if (RB_EMPTY_ROOT(root)) + return NULL; + + if (re) { + if (re->ofs <= ofs && re->ofs + re->len > ofs) + goto lookup_neighbors; + } + + while (*pnode) { + parent = *pnode; + re = rb_entry(*pnode, struct rb_entry, rb_node); + + if (ofs < re->ofs) + pnode = &(*pnode)->rb_left; + else if (ofs >= re->ofs + re->len) + pnode = &(*pnode)->rb_right; + else + goto lookup_neighbors; + } + + *insert_p = pnode; + *insert_parent = parent; + + re = rb_entry(parent, struct rb_entry, rb_node); + tmp_node = parent; + if (parent && ofs > re->ofs) + tmp_node = rb_next(parent); + *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); + + tmp_node = parent; + if (parent && ofs < re->ofs) + tmp_node = rb_prev(parent); + *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); + return NULL; + +lookup_neighbors: + if (ofs == re->ofs || force) { + /* lookup prev node for merging backward later */ + tmp_node = rb_prev(&re->rb_node); + *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); + } + if (ofs == re->ofs + re->len - 1 || force) { + /* lookup next node for merging frontward later */ + tmp_node = rb_next(&re->rb_node); + *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); + } + return re; +} + +bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi, + struct rb_root *root) +{ +#ifdef CONFIG_F2FS_CHECK_FS + struct rb_node *cur = rb_first(root), *next; + struct rb_entry *cur_re, *next_re; + + if (!cur) + return true; + + while (cur) { + next = rb_next(cur); + if (!next) + return true; + + cur_re = rb_entry(cur, struct rb_entry, rb_node); + next_re = rb_entry(next, struct rb_entry, rb_node); + + if (cur_re->ofs + cur_re->len > next_re->ofs) { + f2fs_msg(sbi->sb, KERN_INFO, "inconsistent rbtree, " + "cur(%u, %u) next(%u, %u)", + cur_re->ofs, cur_re->len, + next_re->ofs, next_re->len); + return false; + } + + cur = next; + } +#endif + return true; +} + static struct kmem_cache *extent_tree_slab; static struct kmem_cache *extent_node_slab; @@ -77,7 +250,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) struct extent_tree *et; nid_t ino = inode->i_ino; - down_write(&sbi->extent_tree_lock); + mutex_lock(&sbi->extent_tree_lock); et = radix_tree_lookup(&sbi->extent_tree_root, ino); if (!et) { et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); @@ -94,7 +267,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) atomic_dec(&sbi->total_zombie_tree); list_del_init(&et->list); } - up_write(&sbi->extent_tree_lock); + mutex_unlock(&sbi->extent_tree_lock); /* never died until evict_inode */ F2FS_I(inode)->extent_tree = et; @@ -102,36 +275,6 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) return et; } -static struct extent_node *__lookup_extent_tree(struct f2fs_sb_info *sbi, - struct extent_tree *et, unsigned int fofs) -{ - struct rb_node *node = et->root.rb_node; - struct extent_node *en = et->cached_en; - - if (en) { - struct extent_info *cei = &en->ei; - - if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) { - stat_inc_cached_node_hit(sbi); - return en; - } - } - - while (node) { - en = rb_entry(node, struct extent_node, rb_node); - - if (fofs < en->ei.fofs) { - node = node->rb_left; - } else if (fofs >= en->ei.fofs + en->ei.len) { - node = node->rb_right; - } else { - stat_inc_rbtree_node_hit(sbi); - return en; - } - } - return NULL; -} - static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei) { @@ -172,12 +315,12 @@ static void __drop_largest_extent(struct inode *inode, if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) { largest->len = 0; - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, true); } } /* return true, if inode page is changed */ -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) +static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et; @@ -215,6 +358,16 @@ out: return false; } +bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) +{ + bool ret = __f2fs_init_extent_tree(inode, i_ext); + + if (!F2FS_I(inode)->extent_tree) + set_inode_flag(inode, FI_NO_EXTENT); + + return ret; +} + static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, struct extent_info *ei) { @@ -237,17 +390,24 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, goto out; } - en = __lookup_extent_tree(sbi, et, pgofs); - if (en) { - *ei = en->ei; - spin_lock(&sbi->extent_lock); - if (!list_empty(&en->list)) { - list_move_tail(&en->list, &sbi->extent_list); - et->cached_en = en; - } - spin_unlock(&sbi->extent_lock); - ret = true; + en = (struct extent_node *)__lookup_rb_tree(&et->root, + (struct rb_entry *)et->cached_en, pgofs); + if (!en) + goto out; + + if (en == et->cached_en) + stat_inc_cached_node_hit(sbi); + else + stat_inc_rbtree_node_hit(sbi); + + *ei = en->ei; + spin_lock(&sbi->extent_lock); + if (!list_empty(&en->list)) { + list_move_tail(&en->list, &sbi->extent_list); + et->cached_en = en; } + spin_unlock(&sbi->extent_lock); + ret = true; out: stat_inc_total_hit(sbi); read_unlock(&et->lock); @@ -256,87 +416,6 @@ out: return ret; } - -/* - * lookup extent at @fofs, if hit, return the extent - * if not, return NULL and - * @prev_ex: extent before fofs - * @next_ex: extent after fofs - * @insert_p: insert point for new extent at fofs - * in order to simpfy the insertion after. - * tree must stay unchanged between lookup and insertion. - */ -static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et, - unsigned int fofs, - struct extent_node **prev_ex, - struct extent_node **next_ex, - struct rb_node ***insert_p, - struct rb_node **insert_parent) -{ - struct rb_node **pnode = &et->root.rb_node; - struct rb_node *parent = NULL, *tmp_node; - struct extent_node *en = et->cached_en; - - *insert_p = NULL; - *insert_parent = NULL; - *prev_ex = NULL; - *next_ex = NULL; - - if (RB_EMPTY_ROOT(&et->root)) - return NULL; - - if (en) { - struct extent_info *cei = &en->ei; - - if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) - goto lookup_neighbors; - } - - while (*pnode) { - parent = *pnode; - en = rb_entry(*pnode, struct extent_node, rb_node); - - if (fofs < en->ei.fofs) - pnode = &(*pnode)->rb_left; - else if (fofs >= en->ei.fofs + en->ei.len) - pnode = &(*pnode)->rb_right; - else - goto lookup_neighbors; - } - - *insert_p = pnode; - *insert_parent = parent; - - en = rb_entry(parent, struct extent_node, rb_node); - tmp_node = parent; - if (parent && fofs > en->ei.fofs) - tmp_node = rb_next(parent); - *next_ex = tmp_node ? - rb_entry(tmp_node, struct extent_node, rb_node) : NULL; - - tmp_node = parent; - if (parent && fofs < en->ei.fofs) - tmp_node = rb_prev(parent); - *prev_ex = tmp_node ? - rb_entry(tmp_node, struct extent_node, rb_node) : NULL; - return NULL; - -lookup_neighbors: - if (fofs == en->ei.fofs) { - /* lookup prev node for merging backward later */ - tmp_node = rb_prev(&en->rb_node); - *prev_ex = tmp_node ? - rb_entry(tmp_node, struct extent_node, rb_node) : NULL; - } - if (fofs == en->ei.fofs + en->ei.len - 1) { - /* lookup next node for merging frontward later */ - tmp_node = rb_next(&en->rb_node); - *next_ex = tmp_node ? - rb_entry(tmp_node, struct extent_node, rb_node) : NULL; - } - return en; -} - static struct extent_node *__try_merge_extent_node(struct inode *inode, struct extent_tree *et, struct extent_info *ei, struct extent_node *prev_ex, @@ -391,17 +470,7 @@ static struct extent_node *__insert_extent_tree(struct inode *inode, goto do_insert; } - while (*p) { - parent = *p; - en = rb_entry(parent, struct extent_node, rb_node); - - if (ei->fofs < en->ei.fofs) - p = &(*p)->rb_left; - else if (ei->fofs >= en->ei.fofs + en->ei.len) - p = &(*p)->rb_right; - else - f2fs_bug_on(sbi, 1); - } + p = __lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs); do_insert: en = __attach_extent_node(sbi, et, ei, parent, p); if (!en) @@ -417,7 +486,7 @@ do_insert: return en; } -static unsigned int f2fs_update_extent_tree_range(struct inode *inode, +static void f2fs_update_extent_tree_range(struct inode *inode, pgoff_t fofs, block_t blkaddr, unsigned int len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -430,7 +499,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, unsigned int pos = (unsigned int)fofs; if (!et) - return false; + return; trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len); @@ -438,7 +507,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, if (is_inode_flag_set(inode, FI_NO_EXTENT)) { write_unlock(&et->lock); - return false; + return; } prev = et->largest; @@ -451,8 +520,11 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, __drop_largest_extent(inode, fofs, len); /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ - en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en, - &insert_p, &insert_parent); + en = (struct extent_node *)__lookup_rb_tree_ret(&et->root, + (struct rb_entry *)et->cached_en, fofs, + (struct rb_entry **)&prev_en, + (struct rb_entry **)&next_en, + &insert_p, &insert_parent, false); if (!en) en = next_en; @@ -493,9 +565,8 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, if (!next_en) { struct rb_node *node = rb_next(&en->rb_node); - next_en = node ? - rb_entry(node, struct extent_node, rb_node) - : NULL; + next_en = rb_entry_safe(node, struct extent_node, + rb_node); } if (parts) @@ -536,8 +607,6 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, __free_extent_tree(sbi, et); write_unlock(&et->lock); - - return !__is_extent_same(&prev, &et->largest); } unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) @@ -553,7 +622,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) if (!atomic_read(&sbi->total_zombie_tree)) goto free_node; - if (!down_write_trylock(&sbi->extent_tree_lock)) + if (!mutex_trylock(&sbi->extent_tree_lock)) goto out; /* 1. remove unreferenced extent tree */ @@ -575,11 +644,11 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) goto unlock_out; cond_resched(); } - up_write(&sbi->extent_tree_lock); + mutex_unlock(&sbi->extent_tree_lock); free_node: /* 2. remove LRU extent entries */ - if (!down_write_trylock(&sbi->extent_tree_lock)) + if (!mutex_trylock(&sbi->extent_tree_lock)) goto out; remained = nr_shrink - (node_cnt + tree_cnt); @@ -609,7 +678,7 @@ free_node: spin_unlock(&sbi->extent_lock); unlock_out: - up_write(&sbi->extent_tree_lock); + mutex_unlock(&sbi->extent_tree_lock); out: trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt); @@ -656,10 +725,10 @@ void f2fs_destroy_extent_tree(struct inode *inode) if (inode->i_nlink && !is_bad_inode(inode) && atomic_read(&et->node_cnt)) { - down_write(&sbi->extent_tree_lock); + mutex_lock(&sbi->extent_tree_lock); list_add_tail(&et->list, &sbi->zombie_list); atomic_inc(&sbi->total_zombie_tree); - up_write(&sbi->extent_tree_lock); + mutex_unlock(&sbi->extent_tree_lock); return; } @@ -667,12 +736,12 @@ void f2fs_destroy_extent_tree(struct inode *inode) node_cnt = f2fs_destroy_extent_node(inode); /* delete extent tree entry in radix tree */ - down_write(&sbi->extent_tree_lock); + mutex_lock(&sbi->extent_tree_lock); f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); kmem_cache_free(extent_tree_slab, et); atomic_dec(&sbi->total_ext_tree); - up_write(&sbi->extent_tree_lock); + mutex_unlock(&sbi->extent_tree_lock); F2FS_I(inode)->extent_tree = NULL; @@ -719,7 +788,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data *dn, void init_extent_cache_info(struct f2fs_sb_info *sbi) { INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO); - init_rwsem(&sbi->extent_tree_lock); + mutex_init(&sbi->extent_tree_lock); INIT_LIST_HEAD(&sbi->extent_list); spin_lock_init(&sbi->extent_lock); atomic_set(&sbi->total_ext_tree, 0); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 88e111ab068b..461e22d0a452 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -19,10 +19,16 @@ #include #include #include +#include #include #include #include -#include +#include +#ifdef CONFIG_F2FS_FS_ENCRYPTION +#include +#else +#include +#endif #include #ifdef CONFIG_F2FS_CHECK_FS @@ -46,6 +52,7 @@ enum { FAULT_BLOCK, FAULT_DIR_DEPTH, FAULT_EVICT_INODE, + FAULT_TRUNCATE, FAULT_IO, FAULT_CHECKPOINT, FAULT_MAX, @@ -58,7 +65,7 @@ struct f2fs_fault_info { }; extern char *fault_name[FAULT_MAX]; -#define IS_FAULT_SET(fi, type) (fi->inject_type & (1 << (type))) +#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type))) #endif /* @@ -83,10 +90,14 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_FAULT_INJECTION 0x00010000 #define F2FS_MOUNT_ADAPTIVE 0x00020000 #define F2FS_MOUNT_LFS 0x00040000 +#define F2FS_MOUNT_USRQUOTA 0x00080000 +#define F2FS_MOUNT_GRPQUOTA 0x00100000 +#define F2FS_MOUNT_PRJQUOTA 0x00200000 +#define F2FS_MOUNT_QUOTA 0x00400000 -#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) -#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) -#define test_opt(sbi, option) (sbi->mount_opt.opt & F2FS_MOUNT_##option) +#define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) +#define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) +#define test_opt(sbi, option) ((sbi)->mount_opt.opt & F2FS_MOUNT_##option) #define ver_after(a, b) (typecheck(unsigned long long, a) && \ typecheck(unsigned long long, b) && \ @@ -102,15 +113,19 @@ struct f2fs_mount_info { unsigned int opt; }; -#define F2FS_FEATURE_ENCRYPT 0x0001 -#define F2FS_FEATURE_HMSMR 0x0002 +#define F2FS_FEATURE_ENCRYPT 0x0001 +#define F2FS_FEATURE_BLKZONED 0x0002 +#define F2FS_FEATURE_ATOMIC_WRITE 0x0004 +#define F2FS_FEATURE_EXTRA_ATTR 0x0008 +#define F2FS_FEATURE_PRJQUOTA 0x0010 +#define F2FS_FEATURE_INODE_CHKSUM 0x0020 #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) #define F2FS_SET_FEATURE(sb, mask) \ - F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask) + (F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask)) #define F2FS_CLEAR_FEATURE(sb, mask) \ - F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask) + (F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask)) /* * For checkpoint manager @@ -120,19 +135,22 @@ enum { SIT_BITMAP }; -enum { - CP_UMOUNT, - CP_FASTBOOT, - CP_SYNC, - CP_RECOVERY, - CP_DISCARD, -}; +#define CP_UMOUNT 0x00000001 +#define CP_FASTBOOT 0x00000002 +#define CP_SYNC 0x00000004 +#define CP_RECOVERY 0x00000008 +#define CP_DISCARD 0x00000010 +#define CP_TRIMMED 0x00000020 -#define DEF_BATCHED_TRIM_SECTIONS 2 +#define DEF_BATCHED_TRIM_SECTIONS 2048 #define BATCHED_TRIM_SEGMENTS(sbi) \ - (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec) + (GET_SEG_FROM_SEC(sbi, SM_I(sbi)->trim_sections)) #define BATCHED_TRIM_BLOCKS(sbi) \ (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) +#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) +#define DISCARD_ISSUE_RATE 8 +#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */ +#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */ #define DEF_CP_INTERVAL 60 /* 60 secs */ #define DEF_IDLE_INTERVAL 5 /* 5 secs */ @@ -174,18 +192,73 @@ struct inode_entry { struct inode *inode; /* vfs inode pointer */ }; -/* for the list of blockaddresses to be discarded */ +/* for the bitmap indicate blocks to be discarded */ struct discard_entry { struct list_head list; /* list head */ - block_t blkaddr; /* block address to be discarded */ - int len; /* # of consecutive blocks of the discard */ + block_t start_blkaddr; /* start blockaddr of current segment */ + unsigned char discard_map[SIT_VBLOCK_MAP_SIZE]; /* segment discard bitmap */ }; -struct bio_entry { - struct list_head list; - struct bio *bio; - struct completion event; - int error; +/* default discard granularity of inner discard thread, unit: block count */ +#define DEFAULT_DISCARD_GRANULARITY 16 + +/* max discard pend list number */ +#define MAX_PLIST_NUM 512 +#define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \ + (MAX_PLIST_NUM - 1) : (blk_num - 1)) + +#define P_ACTIVE 0x01 +#define P_TRIM 0x02 +#define plist_issue(tag) (((tag) & P_ACTIVE) || ((tag) & P_TRIM)) + +enum { + D_PREP, + D_SUBMIT, + D_DONE, +}; + +struct discard_info { + block_t lstart; /* logical start address */ + block_t len; /* length */ + block_t start; /* actual start address in dev */ +}; + +struct discard_cmd { + struct rb_node rb_node; /* rb node located in rb-tree */ + union { + struct { + block_t lstart; /* logical start address */ + block_t len; /* length */ + block_t start; /* actual start address in dev */ + }; + struct discard_info di; /* discard info */ + + }; + struct list_head list; /* command list */ + struct completion wait; /* compleation */ + struct block_device *bdev; /* bdev */ + unsigned short ref; /* reference count */ + unsigned char state; /* state */ + int error; /* bio error */ +}; + +struct discard_cmd_control { + struct task_struct *f2fs_issue_discard; /* discard thread */ + struct list_head entry_list; /* 4KB discard entry list */ + struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */ + unsigned char pend_list_tag[MAX_PLIST_NUM];/* tag for pending entries */ + struct list_head wait_list; /* store on-flushing entries */ + wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */ + unsigned int discard_wake; /* to wake up discard thread */ + struct mutex cmd_lock; + unsigned int nr_discards; /* # of discards in the list */ + unsigned int max_discards; /* max. discards to be issued */ + unsigned int discard_granularity; /* discard granularity */ + unsigned int undiscard_blks; /* # of undiscard blocks */ + atomic_t issued_discard; /* # of issued discard */ + atomic_t issing_discard; /* # of issing discard */ + atomic_t discard_cmd_cnt; /* # of cached cmd count */ + struct rb_root root; /* root of discard rb-tree */ }; /* for the list of fsync inodes, used only during recovery */ @@ -196,13 +269,13 @@ struct fsync_inode_entry { block_t last_dentry; /* block address locating the last dentry */ }; -#define nats_in_cursum(jnl) (le16_to_cpu(jnl->n_nats)) -#define sits_in_cursum(jnl) (le16_to_cpu(jnl->n_sits)) +#define nats_in_cursum(jnl) (le16_to_cpu((jnl)->n_nats)) +#define sits_in_cursum(jnl) (le16_to_cpu((jnl)->n_sits)) -#define nat_in_journal(jnl, i) (jnl->nat_j.entries[i].ne) -#define nid_in_journal(jnl, i) (jnl->nat_j.entries[i].nid) -#define sit_in_journal(jnl, i) (jnl->sit_j.entries[i].se) -#define segno_in_journal(jnl, i) (jnl->sit_j.entries[i].segno) +#define nat_in_journal(jnl, i) ((jnl)->nat_j.entries[i].ne) +#define nid_in_journal(jnl, i) ((jnl)->nat_j.entries[i].nid) +#define sit_in_journal(jnl, i) ((jnl)->sit_j.entries[i].se) +#define segno_in_journal(jnl, i) ((jnl)->sit_j.entries[i].segno) #define MAX_NAT_JENTRIES(jnl) (NAT_JOURNAL_ENTRIES - nats_in_cursum(jnl)) #define MAX_SIT_JENTRIES(jnl) (SIT_JOURNAL_ENTRIES - sits_in_cursum(jnl)) @@ -210,6 +283,7 @@ struct fsync_inode_entry { static inline int update_nats_in_cursum(struct f2fs_journal *journal, int i) { int before = nats_in_cursum(journal); + journal->n_nats = cpu_to_le16(before + i); return before; } @@ -217,6 +291,7 @@ static inline int update_nats_in_cursum(struct f2fs_journal *journal, int i) static inline int update_sits_in_cursum(struct f2fs_journal *journal, int i) { int before = sits_in_cursum(journal); + journal->n_sits = cpu_to_le16(before + i); return before; } @@ -242,11 +317,17 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) #define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) -#define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6) +#define F2FS_IOC_GARBAGE_COLLECT _IOW(F2FS_IOCTL_MAGIC, 6, __u32) #define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7) -#define F2FS_IOC_DEFRAGMENT _IO(F2FS_IOCTL_MAGIC, 8) +#define F2FS_IOC_DEFRAGMENT _IOWR(F2FS_IOCTL_MAGIC, 8, \ + struct f2fs_defragment) #define F2FS_IOC_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ struct f2fs_move_range) +#define F2FS_IOC_FLUSH_DEVICE _IOW(F2FS_IOCTL_MAGIC, 10, \ + struct f2fs_flush_device) +#define F2FS_IOC_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11, \ + struct f2fs_gc_range) +#define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, __u32) #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY @@ -271,6 +352,12 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC32_GETVERSION FS_IOC32_GETVERSION #endif +struct f2fs_gc_range { + u32 sync; + u64 start; + u64 len; +}; + struct f2fs_defragment { u64 start; u64 len; @@ -283,36 +370,68 @@ struct f2fs_move_range { u64 len; /* size to move */ }; +struct f2fs_flush_device { + u32 dev_num; /* device number to flush */ + u32 segments; /* # of segments to flush */ +}; + +/* for inline stuff */ +#define DEF_INLINE_RESERVED_SIZE 1 +static inline int get_extra_isize(struct inode *inode); +#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \ + (CUR_ADDRS_PER_INODE(inode) - \ + DEF_INLINE_RESERVED_SIZE - \ + F2FS_INLINE_XATTR_ADDRS)) + +/* for inline dir */ +#define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ + BITS_PER_BYTE + 1)) +#define INLINE_DENTRY_BITMAP_SIZE(inode) ((NR_INLINE_DENTRY(inode) + \ + BITS_PER_BYTE - 1) / BITS_PER_BYTE) +#define INLINE_RESERVED_SIZE(inode) (MAX_INLINE_DATA(inode) - \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ + NR_INLINE_DENTRY(inode) + \ + INLINE_DENTRY_BITMAP_SIZE(inode))) + /* * For INODE and NODE manager */ /* for directory operations */ struct f2fs_dentry_ptr { struct inode *inode; - const void *bitmap; + void *bitmap; struct f2fs_dir_entry *dentry; __u8 (*filename)[F2FS_SLOT_LEN]; int max; + int nr_bitmap; }; -static inline void make_dentry_ptr(struct inode *inode, - struct f2fs_dentry_ptr *d, void *src, int type) +static inline void make_dentry_ptr_block(struct inode *inode, + struct f2fs_dentry_ptr *d, struct f2fs_dentry_block *t) { d->inode = inode; + d->max = NR_DENTRY_IN_BLOCK; + d->nr_bitmap = SIZE_OF_DENTRY_BITMAP; + d->bitmap = &t->dentry_bitmap; + d->dentry = t->dentry; + d->filename = t->filename; +} - if (type == 1) { - struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src; - d->max = NR_DENTRY_IN_BLOCK; - d->bitmap = &t->dentry_bitmap; - d->dentry = t->dentry; - d->filename = t->filename; - } else { - struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src; - d->max = NR_INLINE_DENTRY; - d->bitmap = &t->dentry_bitmap; - d->dentry = t->dentry; - d->filename = t->filename; - } +static inline void make_dentry_ptr_inline(struct inode *inode, + struct f2fs_dentry_ptr *d, void *t) +{ + int entry_cnt = NR_INLINE_DENTRY(inode); + int bitmap_size = INLINE_DENTRY_BITMAP_SIZE(inode); + int reserved_size = INLINE_RESERVED_SIZE(inode); + + d->inode = inode; + d->max = entry_cnt; + d->nr_bitmap = bitmap_size; + d->bitmap = t; + d->dentry = t + bitmap_size + reserved_size; + d->filename = t + bitmap_size + reserved_size + + SIZE_OF_DIR_ENTRY * entry_cnt; } /* @@ -344,16 +463,30 @@ enum { /* number of extent info in extent cache we try to shrink */ #define EXTENT_CACHE_SHRINK_NUMBER 128 +struct rb_entry { + struct rb_node rb_node; /* rb node located in rb-tree */ + unsigned int ofs; /* start offset of the entry */ + unsigned int len; /* length of the entry */ +}; + struct extent_info { unsigned int fofs; /* start offset in a file */ - u32 blk; /* start block address of the extent */ unsigned int len; /* length of the extent */ + u32 blk; /* start block address of the extent */ }; struct extent_node { - struct rb_node rb_node; /* rb node located in rb-tree */ + struct rb_node rb_node; + union { + struct { + unsigned int fofs; + unsigned int len; + u32 blk; + }; + struct extent_info ei; /* extent info */ + + }; struct list_head list; /* node in global extent list of sbi */ - struct extent_info ei; /* extent info */ struct extent_tree *et; /* extent tree pointer */ }; @@ -387,12 +520,13 @@ struct f2fs_map_blocks { }; /* for flag in get_data_block */ -#define F2FS_GET_BLOCK_READ 0 -#define F2FS_GET_BLOCK_DIO 1 -#define F2FS_GET_BLOCK_FIEMAP 2 -#define F2FS_GET_BLOCK_BMAP 3 -#define F2FS_GET_BLOCK_PRE_DIO 4 -#define F2FS_GET_BLOCK_PRE_AIO 5 +enum { + F2FS_GET_BLOCK_DEFAULT, + F2FS_GET_BLOCK_FIEMAP, + F2FS_GET_BLOCK_BMAP, + F2FS_GET_BLOCK_PRE_DIO, + F2FS_GET_BLOCK_PRE_AIO, +}; /* * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. @@ -401,6 +535,7 @@ struct f2fs_map_blocks { #define FADVISE_LOST_PINO_BIT 0x02 #define FADVISE_ENCRYPT_BIT 0x04 #define FADVISE_ENC_NAME_BIT 0x08 +#define FADVISE_KEEP_SIZE_BIT 0x10 #define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) #define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) @@ -413,6 +548,8 @@ struct f2fs_map_blocks { #define file_clear_encrypt(inode) clear_file(inode, FADVISE_ENCRYPT_BIT) #define file_enc_name(inode) is_file(inode, FADVISE_ENC_NAME_BIT) #define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT) +#define file_keep_isize(inode) is_file(inode, FADVISE_KEEP_SIZE_BIT) +#define file_set_keep_isize(inode) set_file(inode, FADVISE_KEEP_SIZE_BIT) #define DEF_DIR_LEVEL 0 @@ -432,16 +569,28 @@ struct f2fs_inode_info { f2fs_hash_t chash; /* hash value of given file name */ unsigned int clevel; /* maximum level of given file name */ struct task_struct *task; /* lookup and create consistency */ + struct task_struct *cp_task; /* separate cp/wb IO stats*/ nid_t i_xattr_nid; /* node id that contains xattrs */ - unsigned long long xattr_ver; /* cp version of xattr modification */ loff_t last_disk_size; /* lastly written file size */ +#ifdef CONFIG_QUOTA + struct dquot *i_dquot[MAXQUOTAS]; + + /* quota space reservation, managed internally by quota code */ + qsize_t i_reserved_quota; +#endif struct list_head dirty_list; /* dirty list for dirs and files */ struct list_head gdirty_list; /* linked in global dirty list */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ + struct task_struct *inmem_task; /* store inmemory task */ struct mutex inmem_lock; /* lock for inmemory pages */ struct extent_tree *extent_tree; /* cached extent_tree entry */ struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ + struct rw_semaphore i_mmap_sem; + struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */ + + int i_extra_isize; /* size of extra space located in i_addr */ + kprojid_t i_projid; /* id for project quota */ }; static inline void get_extent_info(struct extent_info *ext, @@ -468,11 +617,22 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, ei->len = len; } -static inline bool __is_extent_same(struct extent_info *ei1, - struct extent_info *ei2) +static inline bool __is_discard_mergeable(struct discard_info *back, + struct discard_info *front) { - return (ei1->fofs == ei2->fofs && ei1->blk == ei2->blk && - ei1->len == ei2->len); + return back->lstart + back->len == front->lstart; +} + +static inline bool __is_discard_back_mergeable(struct discard_info *cur, + struct discard_info *back) +{ + return __is_discard_mergeable(back, cur); +} + +static inline bool __is_discard_front_mergeable(struct discard_info *cur, + struct discard_info *front) +{ + return __is_discard_mergeable(cur, front); } static inline bool __is_extent_mergeable(struct extent_info *back, @@ -494,20 +654,26 @@ static inline bool __is_front_mergeable(struct extent_info *cur, return __is_extent_mergeable(cur, front); } -extern void f2fs_mark_inode_dirty_sync(struct inode *); +extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync); static inline void __try_update_largest_extent(struct inode *inode, struct extent_tree *et, struct extent_node *en) { if (en->ei.len > et->largest.len) { et->largest = en->ei; - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, true); } } +enum nid_list { + FREE_NID_LIST, + ALLOC_NID_LIST, + MAX_NID_LIST, +}; + struct f2fs_nm_info { block_t nat_blkaddr; /* base disk address of NAT */ nid_t max_nid; /* maximum possible node ids */ - nid_t available_nids; /* maximum available node ids */ + nid_t available_nids; /* # of available node ids */ nid_t next_scan_nid; /* the next nid to be scanned */ unsigned int ram_thresh; /* control the memory footprint */ unsigned int ra_nid_pages; /* # of nid pages to be readaheaded */ @@ -520,16 +686,28 @@ struct f2fs_nm_info { struct list_head nat_entries; /* cached nat entry list (clean) */ unsigned int nat_cnt; /* the # of cached nat entries */ unsigned int dirty_nat_cnt; /* total num of nat entries in set */ + unsigned int nat_blocks; /* # of nat blocks */ /* free node ids management */ struct radix_tree_root free_nid_root;/* root of the free_nid cache */ - struct list_head free_nid_list; /* a list for free nids */ - spinlock_t free_nid_list_lock; /* protect free nid list */ - unsigned int fcnt; /* the number of free node id */ + struct list_head nid_list[MAX_NID_LIST];/* lists for free nids */ + unsigned int nid_cnt[MAX_NID_LIST]; /* the number of free node id */ + spinlock_t nid_list_lock; /* protect nid lists ops */ struct mutex build_lock; /* lock for build free nids */ + unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; + unsigned char *nat_block_bitmap; + unsigned short *free_nid_count; /* free nid count of NAT block */ /* for checkpoint */ char *nat_bitmap; /* NAT bitmap pointer */ + + unsigned int nat_bits_blocks; /* # of nat bits blocks */ + unsigned char *nat_bits; /* NAT bits blocks */ + unsigned char *full_nat_bits; /* full NAT pages */ + unsigned char *empty_nat_bits; /* empty NAT pages */ +#ifdef CONFIG_F2FS_CHECK_FS + char *nat_bitmap_mir; /* NAT bitmap mirror */ +#endif int bitmap_size; /* bitmap size */ }; @@ -586,7 +764,6 @@ enum { CURSEG_WARM_NODE, /* direct node blocks of normal files */ CURSEG_COLD_NODE, /* indirect node blocks */ NO_CHECK_TYPE, - CURSEG_DIRECT_IO, /* to use for the direct IO path */ }; struct flush_cmd { @@ -598,7 +775,8 @@ struct flush_cmd { struct flush_cmd_control { struct task_struct *f2fs_issue_flush; /* flush thread */ wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ - atomic_t submit_flush; /* # of issued flushes */ + atomic_t issued_flush; /* # of issued flushes */ + atomic_t issing_flush; /* # of issing flushes */ struct llist_head issue_list; /* list for command issue */ struct llist_node *dispatch_list; /* list for command dispatch */ }; @@ -621,12 +799,6 @@ struct f2fs_sm_info { /* a threshold to reclaim prefree segments */ unsigned int rec_prefree_segments; - /* for small discard management */ - struct list_head discard_list; /* 4KB discard list */ - struct list_head wait_list; /* linked with issued discard bio */ - int nr_discards; /* # of discards in the list */ - int max_discards; /* max. discards to be issued */ - /* for batched trimming */ unsigned int trim_sections; /* # of sections to trim */ @@ -635,10 +807,13 @@ struct f2fs_sm_info { unsigned int ipu_policy; /* in-place-update policy */ unsigned int min_ipu_util; /* in-place-update threshold */ unsigned int min_fsync_blocks; /* threshold for fsync */ + unsigned int min_hot_blocks; /* threshold for hot block allocation */ /* for flush command control */ - struct flush_cmd_control *cmd_control_info; + struct flush_cmd_control *fcc_info; + /* for discard command control */ + struct discard_cmd_control *dcc_info; }; /* @@ -650,6 +825,7 @@ struct f2fs_sm_info { * f2fs monitors the number of several block types such as on-writeback, * dirty dentry blocks, dirty node blocks, and dirty meta blocks. */ +#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_DATA, @@ -657,6 +833,8 @@ enum count_type { F2FS_DIRTY_META, F2FS_INMEM_PAGES, F2FS_DIRTY_IMETA, + F2FS_WB_CP_DATA, + F2FS_WB_DATA, NR_COUNT_TYPE, }; @@ -680,29 +858,82 @@ enum page_type { META_FLUSH, INMEM, /* the below types are used by tracepoints only. */ INMEM_DROP, + INMEM_INVALIDATE, INMEM_REVOKE, IPU, OPU, }; +enum temp_type { + HOT = 0, /* must be zero for meta bio */ + WARM, + COLD, + NR_TEMP_TYPE, +}; + +enum need_lock_type { + LOCK_REQ = 0, + LOCK_DONE, + LOCK_RETRY, +}; + +enum iostat_type { + APP_DIRECT_IO, /* app direct IOs */ + APP_BUFFERED_IO, /* app buffered IOs */ + APP_WRITE_IO, /* app write IOs */ + APP_MAPPED_IO, /* app mapped IOs */ + FS_DATA_IO, /* data IOs from kworker/fsync/reclaimer */ + FS_NODE_IO, /* node IOs from kworker/fsync/reclaimer */ + FS_META_IO, /* meta IOs from kworker/reclaimer */ + FS_GC_DATA_IO, /* data IOs from forground gc */ + FS_GC_NODE_IO, /* node IOs from forground gc */ + FS_CP_DATA_IO, /* data IOs from checkpoint */ + FS_CP_NODE_IO, /* node IOs from checkpoint */ + FS_CP_META_IO, /* meta IOs from checkpoint */ + FS_DISCARD, /* discard */ + NR_IO_TYPE, +}; + struct f2fs_io_info { struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ + enum temp_type temp; /* contains HOT/WARM/COLD */ int op; /* contains REQ_OP_ */ - int op_flags; /* rq_flag_bits */ + int op_flags; /* req_flag_bits */ block_t new_blkaddr; /* new block address to be written */ block_t old_blkaddr; /* old block address before Cow */ struct page *page; /* page to be written */ struct page *encrypted_page; /* encrypted page */ + struct list_head list; /* serialize IOs */ + bool submitted; /* indicate IO submission */ + int need_lock; /* indicate we need to lock cp_rwsem */ + bool in_list; /* indicate fio is in io_list */ + enum iostat_type io_type; /* io type */ }; -#define is_read_io(rw) (rw == READ) +#define is_read_io(rw) ((rw) == READ) struct f2fs_bio_info { struct f2fs_sb_info *sbi; /* f2fs superblock */ struct bio *bio; /* bios to merge */ sector_t last_block_in_bio; /* last block number */ struct f2fs_io_info fio; /* store buffered io info. */ struct rw_semaphore io_rwsem; /* blocking op for bio */ + spinlock_t io_lock; /* serialize DATA/NODE IOs */ + struct list_head io_list; /* track fios */ +}; + +#define FDEV(i) (sbi->devs[i]) +#define RDEV(i) (raw_super->devs[i]) +struct f2fs_dev_info { + struct block_device *bdev; + char path[MAX_PATH_LEN]; + unsigned int total_segments; + block_t start_blk; + block_t end_blk; +#ifdef CONFIG_BLK_DEV_ZONED + unsigned int nr_blkz; /* Total number of zones */ + u8 *blkz_type; /* Array of zones type */ +#endif }; enum inode_type { @@ -736,10 +967,6 @@ enum { MAX_TIME, }; -#ifdef CONFIG_F2FS_FS_ENCRYPTION -#define F2FS_KEY_DESC_PREFIX "f2fs:" -#define F2FS_KEY_DESC_PREFIX_SIZE 5 -#endif struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -747,10 +974,11 @@ struct f2fs_sb_info { int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ -#ifdef CONFIG_F2FS_FS_ENCRYPTION - u8 key_prefix[F2FS_KEY_DESC_PREFIX_SIZE]; - u8 key_prefix_size; +#ifdef CONFIG_BLK_DEV_ZONED + unsigned int blocks_per_blkz; /* F2FS blocks per zone */ + unsigned int log_blocks_per_blkz; /* log2 F2FS blocks per zone */ #endif + /* for node-related operations */ struct f2fs_nm_info *nm_info; /* node manager */ struct inode *node_inode; /* cache node blocks */ @@ -759,9 +987,11 @@ struct f2fs_sb_info { struct f2fs_sm_info *sm_info; /* segment manager */ /* for bio operations */ - struct f2fs_bio_info read_io; /* for read bios */ - struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ - struct mutex wio_mutex[NODE + 1]; /* bio ordering for NODE/DATA */ + struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ + struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE]; + /* bio ordering for NODE/DATA */ + int write_io_size_bits; /* Write IO size bits */ + mempool_t *write_io_dummy; /* Dummy pages */ /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ @@ -771,6 +1001,7 @@ struct f2fs_sb_info { struct mutex cp_mutex; /* checkpoint procedure lock */ struct rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ + struct rw_semaphore node_change; /* locking node change */ wait_queue_head_t cp_wait; unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ long interval_time[MAX_TIME]; /* to store thresholds */ @@ -786,7 +1017,7 @@ struct f2fs_sb_info { /* for extent tree cache */ struct radix_tree_root extent_tree_root;/* cache extent cache entries */ - struct rw_semaphore extent_tree_lock; /* locking extent radix tree */ + struct mutex extent_tree_lock; /* locking extent radix tree */ struct list_head extent_list; /* lru list for shrinker */ spinlock_t extent_lock; /* locking extent lru list */ atomic_t total_ext_tree; /* extent tree count */ @@ -816,14 +1047,18 @@ struct f2fs_sb_info { block_t total_valid_block_count; /* # of valid blocks */ block_t discard_blks; /* discard command candidats */ block_t last_valid_block_count; /* for recovery */ + block_t reserved_blocks; /* configurable reserved blocks */ + u32 s_next_generation; /* for NFS support */ - atomic_t nr_wb_bios; /* # of writeback bios */ /* # of pages, see count_type */ atomic_t nr_pages[NR_COUNT_TYPE]; /* # of allocated blocks */ struct percpu_counter alloc_valid_block_count; + /* writeback control */ + atomic_t wb_sync_req; /* count # of WB_SYNC threads */ + /* valid inode count */ struct percpu_counter total_valid_inode_count; @@ -856,18 +1091,28 @@ struct f2fs_sb_info { atomic_t inline_xattr; /* # of inline_xattr inodes */ atomic_t inline_inode; /* # of inline_data inodes */ atomic_t inline_dir; /* # of inline_dentry inodes */ + atomic_t aw_cnt; /* # of atomic writes */ + atomic_t vw_cnt; /* # of volatile writes */ + atomic_t max_aw_cnt; /* max # of atomic writes */ + atomic_t max_vw_cnt; /* max # of volatile writes */ int bg_gc; /* background gc calls */ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ #endif - unsigned int last_victim[2]; /* last victim segment # */ spinlock_t stat_lock; /* lock for stat operations */ + /* For app/fs IO statistics */ + spinlock_t iostat_lock; + unsigned long long write_iostat[NR_IO_TYPE]; + bool iostat_enable; + /* For sysfs suppport */ struct kobject s_kobj; struct completion s_kobj_unregister; /* For shrinker support */ struct list_head s_list; + int s_ndevs; /* number of devices */ + struct f2fs_dev_info *devs; /* for device list */ struct mutex umount_mutex; unsigned int shrinker_run_no; @@ -878,13 +1123,26 @@ struct f2fs_sb_info { /* Reference to checksum algorithm driver via cryptoapi */ struct crypto_shash *s_chksum_driver; + /* Precomputed FS UUID checksum for seeding other checksums */ + __u32 s_chksum_seed; + /* For fault injection */ #ifdef CONFIG_F2FS_FAULT_INJECTION struct f2fs_fault_info fault_info; #endif + +#ifdef CONFIG_QUOTA + /* Names of quota files with journalled quota */ + char *s_qf_names[MAXQUOTAS]; + int s_jquota_fmt; /* Format of quota to use */ +#endif }; #ifdef CONFIG_F2FS_FAULT_INJECTION +#define f2fs_show_injection_info(type) \ + printk("%sF2FS-fs : inject %s in %s of %pF\n", \ + KERN_INFO, fault_name[type], \ + __func__, __builtin_return_address(0)) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { struct f2fs_fault_info *ffi = &sbi->fault_info; @@ -898,10 +1156,6 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) atomic_inc(&ffi->inject_ops); if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) { atomic_set(&ffi->inject_ops, 0); - printk("%sF2FS-fs : inject %s in %pF\n", - KERN_INFO, - fault_name[type], - __builtin_return_address(0)); return true; } return false; @@ -912,8 +1166,8 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) * and the return value is in kbytes. s is of struct f2fs_sb_info. */ #define BD_PART_WRITTEN(s) \ -(((u64)part_stat_read(s->sb->s_bdev->bd_part, sectors[1]) - \ - s->sectors_written_start) >> 1) +(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[1]) - \ + (s)->sectors_written_start) >> 1) static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) { @@ -969,6 +1223,27 @@ static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc, return f2fs_crc32(sbi, buf, buf_size) == blk_crc; } +static inline u32 f2fs_chksum(struct f2fs_sb_info *sbi, u32 crc, + const void *address, unsigned int length) +{ + struct { + struct shash_desc shash; + char ctx[4]; + } desc; + int err; + + BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver) != sizeof(desc.ctx)); + + desc.shash.tfm = sbi->s_chksum_driver; + desc.shash.flags = 0; + *(u32 *)desc.ctx = crc; + + err = crypto_shash_update(&desc.shash, address, length); + BUG_ON(err); + + return *(u32 *)desc.ctx; +} + static inline struct f2fs_inode_info *F2FS_I(struct inode *inode) { return container_of(inode, struct f2fs_inode_info, vfs_inode); @@ -1069,6 +1344,12 @@ static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) return le64_to_cpu(cp->checkpoint_ver); } +static inline __u64 cur_cp_crc(struct f2fs_checkpoint *cp) +{ + size_t crc_offset = le32_to_cpu(cp->checksum_offset); + return le32_to_cpu(*((__le32 *)((unsigned char *)cp + crc_offset))); +} + static inline bool __is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) { unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); @@ -1092,9 +1373,11 @@ static inline void __set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) static inline void set_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f) { - spin_lock(&sbi->cp_lock); + unsigned long flags; + + spin_lock_irqsave(&sbi->cp_lock, flags); __set_ckpt_flags(F2FS_CKPT(sbi), f); - spin_unlock(&sbi->cp_lock); + spin_unlock_irqrestore(&sbi->cp_lock, flags); } static inline void __clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) @@ -1108,16 +1391,34 @@ static inline void __clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f) { - spin_lock(&sbi->cp_lock); + unsigned long flags; + + spin_lock_irqsave(&sbi->cp_lock, flags); __clear_ckpt_flags(F2FS_CKPT(sbi), f); - spin_unlock(&sbi->cp_lock); + spin_unlock_irqrestore(&sbi->cp_lock, flags); } -static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi) +static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock) { - struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev); + unsigned long flags; - return blk_queue_discard(q); + set_sbi_flag(sbi, SBI_NEED_FSCK); + + if (lock) + spin_lock_irqsave(&sbi->cp_lock, flags); + __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG); + kfree(NM_I(sbi)->nat_bits); + NM_I(sbi)->nat_bits = NULL; + if (lock) + spin_unlock_irqrestore(&sbi->cp_lock, flags); +} + +static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi, + struct cp_control *cpc) +{ + bool set = is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG); + + return (cpc) ? (cpc->reason & CP_UMOUNT) && set : set; } static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) @@ -1125,6 +1426,11 @@ static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) down_read(&sbi->cp_rwsem); } +static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi) +{ + return down_read_trylock(&sbi->cp_rwsem); +} + static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) { up_read(&sbi->cp_rwsem); @@ -1153,7 +1459,7 @@ static inline int __get_cp_reason(struct f2fs_sb_info *sbi) static inline bool __remain_node_summaries(int reason) { - return (reason == CP_UMOUNT || reason == CP_FASTBOOT); + return (reason & (CP_UMOUNT | CP_FASTBOOT)); } static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi) @@ -1174,17 +1480,14 @@ static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) return 0; } -#define F2FS_DEFAULT_ALLOCATED_BLOCKS 1 - /* * Check whether the inode has blocks or not */ static inline int F2FS_HAS_BLOCKS(struct inode *inode) { - if (F2FS_I(inode)->i_xattr_nid) - return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1; - else - return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS; + block_t xattr_block = F2FS_I(inode)->i_xattr_nid ? 1 : 0; + + return (inode->i_blocks >> F2FS_LOG_SECTORS_PER_BLOCK) > xattr_block; } static inline bool f2fs_has_xattr_block(unsigned int ofs) @@ -1192,15 +1495,24 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs) return ofs == XATTR_NODE_OFFSET; } -static inline void f2fs_i_blocks_write(struct inode *, blkcnt_t, bool); -static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, +static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool); +static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, blkcnt_t *count) { - blkcnt_t diff; + blkcnt_t diff = 0, release = 0; + block_t avail_user_block_count; + int ret; + + ret = dquot_reserve_block(inode, *count); + if (ret) + return ret; #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(sbi, FAULT_BLOCK)) - return false; + if (time_to_inject(sbi, FAULT_BLOCK)) { + f2fs_show_injection_info(FAULT_BLOCK); + release = *count; + goto enospc; + } #endif /* * let's increase this in prior to actual block count change in order @@ -1210,39 +1522,50 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); sbi->total_valid_block_count += (block_t)(*count); - if (unlikely(sbi->total_valid_block_count > sbi->user_block_count)) { - diff = sbi->total_valid_block_count - sbi->user_block_count; + avail_user_block_count = sbi->user_block_count - sbi->reserved_blocks; + if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { + diff = sbi->total_valid_block_count - avail_user_block_count; *count -= diff; - sbi->total_valid_block_count = sbi->user_block_count; + release = diff; + sbi->total_valid_block_count = avail_user_block_count; if (!*count) { spin_unlock(&sbi->stat_lock); percpu_counter_sub(&sbi->alloc_valid_block_count, diff); - return false; + goto enospc; } } spin_unlock(&sbi->stat_lock); - f2fs_i_blocks_write(inode, *count, true); - return true; + if (release) + dquot_release_reservation_block(inode, release); + f2fs_i_blocks_write(inode, *count, true, true); + return 0; + +enospc: + dquot_release_reservation_block(inode, release); + return -ENOSPC; } static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, - blkcnt_t count) + block_t count) { + blkcnt_t sectors = count << F2FS_LOG_SECTORS_PER_BLOCK; + spin_lock(&sbi->stat_lock); f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); - f2fs_bug_on(sbi, inode->i_blocks < count); + f2fs_bug_on(sbi, inode->i_blocks < sectors); sbi->total_valid_block_count -= (block_t)count; spin_unlock(&sbi->stat_lock); - f2fs_i_blocks_write(inode, count, false); + f2fs_i_blocks_write(inode, count, false, true); } static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) { atomic_inc(&sbi->nr_pages[count_type]); - if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES) + if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES || + count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA) return; set_sbi_flag(sbi, SBI_IS_DIRTY); @@ -1363,51 +1686,70 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); } -static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, - struct inode *inode) +static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, + struct inode *inode, bool is_inode) { block_t valid_block_count; unsigned int valid_node_count; + bool quota = inode && !is_inode; + + if (quota) { + int ret = dquot_reserve_block(inode, 1); + if (ret) + return ret; + } spin_lock(&sbi->stat_lock); valid_block_count = sbi->total_valid_block_count + 1; - if (unlikely(valid_block_count > sbi->user_block_count)) { + if (unlikely(valid_block_count + sbi->reserved_blocks > + sbi->user_block_count)) { spin_unlock(&sbi->stat_lock); - return false; + goto enospc; } valid_node_count = sbi->total_valid_node_count + 1; if (unlikely(valid_node_count > sbi->total_node_count)) { spin_unlock(&sbi->stat_lock); - return false; + goto enospc; } - if (inode) - f2fs_i_blocks_write(inode, 1, true); - sbi->total_valid_node_count++; sbi->total_valid_block_count++; spin_unlock(&sbi->stat_lock); + if (inode) { + if (is_inode) + f2fs_mark_inode_dirty_sync(inode, true); + else + f2fs_i_blocks_write(inode, 1, true, true); + } + percpu_counter_inc(&sbi->alloc_valid_block_count); - return true; + return 0; + +enospc: + if (quota) + dquot_release_reservation_block(inode, 1); + return -ENOSPC; } static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, - struct inode *inode) + struct inode *inode, bool is_inode) { spin_lock(&sbi->stat_lock); f2fs_bug_on(sbi, !sbi->total_valid_block_count); f2fs_bug_on(sbi, !sbi->total_valid_node_count); - f2fs_bug_on(sbi, !inode->i_blocks); + f2fs_bug_on(sbi, !is_inode && !inode->i_blocks); - f2fs_i_blocks_write(inode, 1, false); sbi->total_valid_node_count--; sbi->total_valid_block_count--; spin_unlock(&sbi->stat_lock); + + if (!is_inode) + f2fs_i_blocks_write(inode, 1, false, true); } static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) @@ -1435,11 +1777,14 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, { #ifdef CONFIG_F2FS_FAULT_INJECTION struct page *page = find_lock_page(mapping, index); + if (page) return page; - if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) + if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) { + f2fs_show_injection_info(FAULT_PAGE_ALLOC); return NULL; + } #endif if (!for_write) return grab_cache_page(mapping, index); @@ -1518,22 +1863,42 @@ static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, static inline bool IS_INODE(struct page *page) { struct f2fs_node *p = F2FS_NODE(page); + return RAW_IS_INODE(p); } +static inline int offset_in_addr(struct f2fs_inode *i) +{ + return (i->i_inline & F2FS_EXTRA_ATTR) ? + (le16_to_cpu(i->i_extra_isize) / sizeof(__le32)) : 0; +} + static inline __le32 *blkaddr_in_node(struct f2fs_node *node) { return RAW_IS_INODE(node) ? node->i.i_addr : node->dn.addr; } -static inline block_t datablock_addr(struct page *node_page, - unsigned int offset) +static inline int f2fs_has_extra_attr(struct inode *inode); +static inline block_t datablock_addr(struct inode *inode, + struct page *node_page, unsigned int offset) { struct f2fs_node *raw_node; __le32 *addr_array; + int base = 0; + bool is_inode = IS_INODE(node_page); + raw_node = F2FS_NODE(node_page); + + /* from GC path only */ + if (!inode) { + if (is_inode) + base = offset_in_addr(&raw_node->i); + } else if (f2fs_has_extra_attr(inode) && is_inode) { + base = get_extra_isize(inode); + } + addr_array = blkaddr_in_node(raw_node); - return le32_to_cpu(addr_array[offset]); + return le32_to_cpu(addr_array[base + offset]); } static inline int f2fs_test_bit(unsigned int nr, char *addr) @@ -1596,6 +1961,20 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) *addr ^= mask; } +#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) +#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) +#define F2FS_FL_INHERITED (FS_PROJINHERIT_FL) + +static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) +{ + if (S_ISDIR(mode)) + return flags; + else if (S_ISREG(mode)) + return flags & F2FS_REG_FLMASK; + else + return flags & F2FS_OTHER_FLMASK; +} + /* used for f2fs_inode_info->flags */ enum { FI_NEW_INODE, /* indicate newly allocated inode */ @@ -1614,6 +1993,7 @@ enum { FI_UPDATE_WRITE, /* inode has in-place-update data */ FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ + FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */ FI_VOLATILE_FILE, /* indicate volatile file */ FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ FI_DROP_CACHE, /* drop dirty page cache */ @@ -1621,6 +2001,10 @@ enum { FI_INLINE_DOTS, /* indicate inline dot dentries */ FI_DO_DEFRAG, /* indicate defragment is running */ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ + FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ + FI_HOT_DATA, /* indicate file is hot */ + FI_EXTRA_ATTR, /* indicate file has extra attribute */ + FI_PROJ_INHERIT, /* indicate file inherits projectid */ }; static inline void __mark_inode_dirty_flag(struct inode *inode, @@ -1634,7 +2018,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, return; case FI_DATA_EXIST: case FI_INLINE_DOTS: - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, true); } } @@ -1661,7 +2045,7 @@ static inline void set_acl_inode(struct inode *inode, umode_t mode) { F2FS_I(inode)->i_acl_mode = mode; set_inode_flag(inode, FI_ACL_MODE); - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, false); } static inline void f2fs_i_links_write(struct inode *inode, bool inc) @@ -1670,18 +2054,26 @@ static inline void f2fs_i_links_write(struct inode *inode, bool inc) inc_nlink(inode); else drop_nlink(inode); - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, true); } static inline void f2fs_i_blocks_write(struct inode *inode, - blkcnt_t diff, bool add) + block_t diff, bool add, bool claim) { bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE); bool recover = is_inode_flag_set(inode, FI_AUTO_RECOVER); - inode->i_blocks = add ? inode->i_blocks + diff : - inode->i_blocks - diff; - f2fs_mark_inode_dirty_sync(inode); + /* add = 1, claim = 1 should be dquot_reserve_block in pair */ + if (add) { + if (claim) + dquot_claim_block(inode, diff); + else + dquot_alloc_block_nofail(inode, diff); + } else { + dquot_free_block(inode, diff); + } + + f2fs_mark_inode_dirty_sync(inode, true); if (clean || recover) set_inode_flag(inode, FI_AUTO_RECOVER); } @@ -1695,34 +2087,27 @@ static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size) return; i_size_write(inode, i_size); - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, true); if (clean || recover) set_inode_flag(inode, FI_AUTO_RECOVER); } -static inline bool f2fs_skip_inode_update(struct inode *inode) -{ - if (!is_inode_flag_set(inode, FI_AUTO_RECOVER)) - return false; - return F2FS_I(inode)->last_disk_size == i_size_read(inode); -} - static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth) { F2FS_I(inode)->i_current_depth = depth; - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, true); } static inline void f2fs_i_xnid_write(struct inode *inode, nid_t xnid) { F2FS_I(inode)->i_xattr_nid = xnid; - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, true); } static inline void f2fs_i_pino_write(struct inode *inode, nid_t pino) { F2FS_I(inode)->i_pino = pino; - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, true); } static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) @@ -1739,6 +2124,8 @@ static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) set_bit(FI_DATA_EXIST, &fi->flags); if (ri->i_inline & F2FS_INLINE_DOTS) set_bit(FI_INLINE_DOTS, &fi->flags); + if (ri->i_inline & F2FS_EXTRA_ATTR) + set_bit(FI_EXTRA_ATTR, &fi->flags); } static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) @@ -1755,6 +2142,13 @@ static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) ri->i_inline |= F2FS_DATA_EXIST; if (is_inode_flag_set(inode, FI_INLINE_DOTS)) ri->i_inline |= F2FS_INLINE_DOTS; + if (is_inode_flag_set(inode, FI_EXTRA_ATTR)) + ri->i_inline |= F2FS_EXTRA_ATTR; +} + +static inline int f2fs_has_extra_attr(struct inode *inode) +{ + return is_inode_flag_set(inode, FI_EXTRA_ATTR); } static inline int f2fs_has_inline_xattr(struct inode *inode) @@ -1765,13 +2159,14 @@ static inline int f2fs_has_inline_xattr(struct inode *inode) static inline unsigned int addrs_per_inode(struct inode *inode) { if (f2fs_has_inline_xattr(inode)) - return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS; - return DEF_ADDRS_PER_INODE; + return CUR_ADDRS_PER_INODE(inode) - F2FS_INLINE_XATTR_ADDRS; + return CUR_ADDRS_PER_INODE(inode); } static inline void *inline_xattr_addr(struct page *page) { struct f2fs_inode *ri = F2FS_INODE(page); + return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS]); } @@ -1789,12 +2184,6 @@ static inline int f2fs_has_inline_data(struct inode *inode) return is_inode_flag_set(inode, FI_INLINE_DATA); } -static inline void f2fs_clear_inline_inode(struct inode *inode) -{ - clear_inode_flag(inode, FI_INLINE_DATA); - clear_inode_flag(inode, FI_DATA_EXIST); -} - static inline int f2fs_exist_data(struct inode *inode) { return is_inode_flag_set(inode, FI_DATA_EXIST); @@ -1810,6 +2199,11 @@ static inline bool f2fs_is_atomic_file(struct inode *inode) return is_inode_flag_set(inode, FI_ATOMIC_FILE); } +static inline bool f2fs_is_commit_atomic_write(struct inode *inode) +{ + return is_inode_flag_set(inode, FI_ATOMIC_COMMIT); +} + static inline bool f2fs_is_volatile_file(struct inode *inode) { return is_inode_flag_set(inode, FI_VOLATILE_FILE); @@ -1825,10 +2219,12 @@ static inline bool f2fs_is_drop_cache(struct inode *inode) return is_inode_flag_set(inode, FI_DROP_CACHE); } -static inline void *inline_data_addr(struct page *page) +static inline void *inline_data_addr(struct inode *inode, struct page *page) { struct f2fs_inode *ri = F2FS_INODE(page); - return (void *)&(ri->i_addr[1]); + int extra_size = get_extra_isize(inode); + + return (void *)&(ri->i_addr[extra_size + DEF_INLINE_RESERVED_SIZE]); } static inline int f2fs_has_inline_dentry(struct inode *inode) @@ -1850,13 +2246,31 @@ static inline int is_file(struct inode *inode, int type) static inline void set_file(struct inode *inode, int type) { F2FS_I(inode)->i_advise |= type; - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, true); } static inline void clear_file(struct inode *inode, int type) { F2FS_I(inode)->i_advise &= ~type; - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, true); +} + +static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) +{ + if (dsync) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + bool ret; + + spin_lock(&sbi->inode_lock[DIRTY_META]); + ret = list_empty(&F2FS_I(inode)->gdirty_list); + spin_unlock(&sbi->inode_lock[DIRTY_META]); + return ret; + } + if (!is_inode_flag_set(inode, FI_AUTO_RECOVER) || + file_keep_isize(inode) || + i_size_read(inode) & PAGE_MASK) + return false; + return F2FS_I(inode)->last_disk_size == i_size_read(inode); } static inline int f2fs_readonly(struct super_block *sb) @@ -1893,13 +2307,15 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(sbi, FAULT_KMALLOC)) + if (time_to_inject(sbi, FAULT_KMALLOC)) { + f2fs_show_injection_info(FAULT_KMALLOC); return NULL; + } #endif return kmalloc(size, flags); } -static inline void *f2fs_kvmalloc(size_t size, gfp_t flags) +static inline void *kvmalloc(size_t size, gfp_t flags) { void *ret; @@ -1909,7 +2325,7 @@ static inline void *f2fs_kvmalloc(size_t size, gfp_t flags) return ret; } -static inline void *f2fs_kvzalloc(size_t size, gfp_t flags) +static inline void *kvzalloc(size_t size, gfp_t flags) { void *ret; @@ -1919,42 +2335,89 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags) return ret; } +static inline int wbc_to_write_flags(struct writeback_control *wbc) +{ + if (wbc->sync_mode == WB_SYNC_ALL) + return REQ_SYNC; + else if (wbc->for_kupdate || wbc->for_background) + return 0; + + return 0; +} + +static inline int get_extra_isize(struct inode *inode) +{ + return F2FS_I(inode)->i_extra_isize / sizeof(__le32); +} + #define get_inode_mode(i) \ ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) -/* get offset of first page in next direct node */ -#define PGOFS_OF_NEXT_DNODE(pgofs, inode) \ - ((pgofs < ADDRS_PER_INODE(inode)) ? ADDRS_PER_INODE(inode) : \ - (pgofs - ADDRS_PER_INODE(inode) + ADDRS_PER_BLOCK) / \ - ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode)) +#define F2FS_TOTAL_EXTRA_ATTR_SIZE \ + (offsetof(struct f2fs_inode, i_extra_end) - \ + offsetof(struct f2fs_inode, i_extra_isize)) \ + +#define F2FS_OLD_ATTRIBUTE_SIZE (offsetof(struct f2fs_inode, i_addr)) +#define F2FS_FITS_IN_INODE(f2fs_inode, extra_isize, field) \ + ((offsetof(typeof(*f2fs_inode), field) + \ + sizeof((f2fs_inode)->field)) \ + <= (F2FS_OLD_ATTRIBUTE_SIZE + extra_isize)) \ + +static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi) +{ + int i; + + spin_lock(&sbi->iostat_lock); + for (i = 0; i < NR_IO_TYPE; i++) + sbi->write_iostat[i] = 0; + spin_unlock(&sbi->iostat_lock); +} + +static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, + enum iostat_type type, unsigned long long io_bytes) +{ + if (!sbi->iostat_enable) + return; + spin_lock(&sbi->iostat_lock); + sbi->write_iostat[type] += io_bytes; + + if (type == APP_WRITE_IO || type == APP_DIRECT_IO) + sbi->write_iostat[APP_BUFFERED_IO] = + sbi->write_iostat[APP_WRITE_IO] - + sbi->write_iostat[APP_DIRECT_IO]; + spin_unlock(&sbi->iostat_lock); +} /* * file.c */ -int f2fs_sync_file(struct file *, loff_t, loff_t, int); -void truncate_data_blocks(struct dnode_of_data *); -int truncate_blocks(struct inode *, u64, bool); -int f2fs_truncate(struct inode *); -int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); -int f2fs_setattr(struct dentry *, struct iattr *); -int truncate_hole(struct inode *, pgoff_t, pgoff_t); -int truncate_data_blocks_range(struct dnode_of_data *, int); -long f2fs_ioctl(struct file *, unsigned int, unsigned long); -long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); +int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); +void truncate_data_blocks(struct dnode_of_data *dn); +int truncate_blocks(struct inode *inode, u64 from, bool lock); +int f2fs_truncate(struct inode *inode); +int f2fs_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat); +int f2fs_setattr(struct dentry *dentry, struct iattr *attr); +int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); +int truncate_data_blocks_range(struct dnode_of_data *dn, int count); +long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); +long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); /* * inode.c */ -void f2fs_set_inode_flags(struct inode *); -struct inode *f2fs_iget(struct super_block *, unsigned long); -struct inode *f2fs_iget_retry(struct super_block *, unsigned long); -int try_to_free_nats(struct f2fs_sb_info *, int); -int update_inode(struct inode *, struct page *); -int update_inode_page(struct inode *); -int f2fs_write_inode(struct inode *, struct writeback_control *); -void f2fs_evict_inode(struct inode *); -void handle_failed_inode(struct inode *); +void f2fs_set_inode_flags(struct inode *inode); +bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page); +void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page); +struct inode *f2fs_iget(struct super_block *sb, unsigned long ino); +struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino); +int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink); +int update_inode(struct inode *inode, struct page *node_page); +int update_inode_page(struct inode *inode); +int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc); +void f2fs_evict_inode(struct inode *inode); +void handle_failed_inode(struct inode *inode); /* * namei.c @@ -1964,40 +2427,45 @@ struct dentry *f2fs_get_parent(struct dentry *child); /* * dir.c */ -void set_de_type(struct f2fs_dir_entry *, umode_t); -unsigned char get_de_type(struct f2fs_dir_entry *); -struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *, - f2fs_hash_t, int *, struct f2fs_dentry_ptr *); -bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, - unsigned int, struct fscrypt_str *); -void do_make_empty_dir(struct inode *, struct inode *, - struct f2fs_dentry_ptr *); -struct page *init_inode_metadata(struct inode *, struct inode *, - const struct qstr *, const struct qstr *, struct page *); -void update_parent_metadata(struct inode *, struct inode *, unsigned int); -int room_for_filename(const void *, int, int); -void f2fs_drop_nlink(struct inode *, struct inode *); -struct f2fs_dir_entry *__f2fs_find_entry(struct inode *, struct fscrypt_name *, - struct page **); -struct f2fs_dir_entry *f2fs_find_entry(struct inode *, const struct qstr *, - struct page **); -struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); -ino_t f2fs_inode_by_name(struct inode *, const struct qstr *, struct page **); -void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, - struct page *, struct inode *); -int update_dent_inode(struct inode *, struct inode *, const struct qstr *); -void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *, - const struct qstr *, f2fs_hash_t , unsigned int); -int f2fs_add_regular_entry(struct inode *, const struct qstr *, - const struct qstr *, struct inode *, nid_t, umode_t); -int __f2fs_do_add_link(struct inode *, struct fscrypt_name*, struct inode *, - nid_t, umode_t); -int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, - umode_t); -void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, - struct inode *); -int f2fs_do_tmpfile(struct inode *, struct inode *); -bool f2fs_empty_dir(struct inode *); +void set_de_type(struct f2fs_dir_entry *de, umode_t mode); +unsigned char get_de_type(struct f2fs_dir_entry *de); +struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname, + f2fs_hash_t namehash, int *max_slots, + struct f2fs_dentry_ptr *d); +int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, + unsigned int start_pos, struct fscrypt_str *fstr); +void do_make_empty_dir(struct inode *inode, struct inode *parent, + struct f2fs_dentry_ptr *d); +struct page *init_inode_metadata(struct inode *inode, struct inode *dir, + const struct qstr *new_name, + const struct qstr *orig_name, struct page *dpage); +void update_parent_metadata(struct inode *dir, struct inode *inode, + unsigned int current_depth); +int room_for_filename(const void *bitmap, int slots, int max_slots); +void f2fs_drop_nlink(struct inode *dir, struct inode *inode); +struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, + struct fscrypt_name *fname, struct page **res_page); +struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, + const struct qstr *child, struct page **res_page); +struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p); +ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, + struct page **page); +void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, + struct page *page, struct inode *inode); +void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, + const struct qstr *name, f2fs_hash_t name_hash, + unsigned int bit_pos); +int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, + const struct qstr *orig_name, + struct inode *inode, nid_t ino, umode_t mode); +int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname, + struct inode *inode, nid_t ino, umode_t mode); +int __f2fs_add_link(struct inode *dir, const struct qstr *name, + struct inode *inode, nid_t ino, umode_t mode); +void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, + struct inode *dir, struct inode *inode); +int f2fs_do_tmpfile(struct inode *inode, struct inode *dir); +bool f2fs_empty_dir(struct inode *dir); static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) { @@ -2008,12 +2476,14 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) /* * super.c */ -int f2fs_inode_dirtied(struct inode *); -void f2fs_inode_synced(struct inode *); -int f2fs_commit_super(struct f2fs_sb_info *, bool); -int f2fs_sync_fs(struct super_block *, int); +int f2fs_inode_dirtied(struct inode *inode, bool sync); +void f2fs_inode_synced(struct inode *inode); +void f2fs_enable_quota_files(struct f2fs_sb_info *sbi); +void f2fs_quota_off_umount(struct super_block *sb); +int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); +int f2fs_sync_fs(struct super_block *sb, int sync); extern __printf(3, 4) -void f2fs_msg(struct super_block *, const char *, const char *, ...); +void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); int sanity_check_ckpt(struct f2fs_sb_info *sbi); /* @@ -2028,160 +2498,190 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info, struct dnode_of_data; struct node_info; -bool available_free_memory(struct f2fs_sb_info *, int); -int need_dentry_mark(struct f2fs_sb_info *, nid_t); -bool is_checkpointed_node(struct f2fs_sb_info *, nid_t); -bool need_inode_block_update(struct f2fs_sb_info *, nid_t); -void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); -pgoff_t get_next_page_offset(struct dnode_of_data *, pgoff_t); -int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); -int truncate_inode_blocks(struct inode *, pgoff_t); -int truncate_xattr_node(struct inode *, struct page *); -int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); -int remove_inode_page(struct inode *); -struct page *new_inode_page(struct inode *); -struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); -void ra_node_page(struct f2fs_sb_info *, nid_t); -struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); -struct page *get_node_page_ra(struct page *, int); -void move_node_page(struct page *, int); -int fsync_node_pages(struct f2fs_sb_info *, struct inode *, - struct writeback_control *, bool); -int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *); -void build_free_nids(struct f2fs_sb_info *); -bool alloc_nid(struct f2fs_sb_info *, nid_t *); -void alloc_nid_done(struct f2fs_sb_info *, nid_t); -void alloc_nid_failed(struct f2fs_sb_info *, nid_t); -int try_to_free_nids(struct f2fs_sb_info *, int); -void recover_inline_xattr(struct inode *, struct page *); -void recover_xattr_data(struct inode *, struct page *, block_t); -int recover_inode_page(struct f2fs_sb_info *, struct page *); -int restore_node_summary(struct f2fs_sb_info *, unsigned int, - struct f2fs_summary_block *); -void flush_nat_entries(struct f2fs_sb_info *); -int build_node_manager(struct f2fs_sb_info *); -void destroy_node_manager(struct f2fs_sb_info *); +bool available_free_memory(struct f2fs_sb_info *sbi, int type); +int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); +bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid); +bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino); +void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni); +pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs); +int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode); +int truncate_inode_blocks(struct inode *inode, pgoff_t from); +int truncate_xattr_node(struct inode *inode, struct page *page); +int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino); +int remove_inode_page(struct inode *inode); +struct page *new_inode_page(struct inode *inode); +struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs); +void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid); +struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid); +struct page *get_node_page_ra(struct page *parent, int start); +void move_node_page(struct page *node_page, int gc_type); +int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, + struct writeback_control *wbc, bool atomic); +int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, + bool do_balance, enum iostat_type io_type); +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount); +bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid); +void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); +void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); +int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); +void recover_inline_xattr(struct inode *inode, struct page *page); +int recover_xattr_data(struct inode *inode, struct page *page, + block_t blkaddr); +int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); +int restore_node_summary(struct f2fs_sb_info *sbi, + unsigned int segno, struct f2fs_summary_block *sum); +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); +int build_node_manager(struct f2fs_sb_info *sbi); +void destroy_node_manager(struct f2fs_sb_info *sbi); int __init create_node_manager_caches(void); void destroy_node_manager_caches(void); /* * segment.c */ -void register_inmem_page(struct inode *, struct page *); -void drop_inmem_pages(struct inode *); -int commit_inmem_pages(struct inode *); -void f2fs_balance_fs(struct f2fs_sb_info *, bool); -void f2fs_balance_fs_bg(struct f2fs_sb_info *); -int f2fs_issue_flush(struct f2fs_sb_info *); -int create_flush_cmd_control(struct f2fs_sb_info *); -void destroy_flush_cmd_control(struct f2fs_sb_info *); -void invalidate_blocks(struct f2fs_sb_info *, block_t); -bool is_checkpointed_data(struct f2fs_sb_info *, block_t); -void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *); -void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); -void release_discard_addrs(struct f2fs_sb_info *); -int npages_for_summary_flush(struct f2fs_sb_info *, bool); -void allocate_new_segments(struct f2fs_sb_info *); -int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); -struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); -void update_meta_page(struct f2fs_sb_info *, void *, block_t); -void write_meta_page(struct f2fs_sb_info *, struct page *); -void write_node_page(unsigned int, struct f2fs_io_info *); -void write_data_page(struct dnode_of_data *, struct f2fs_io_info *); -void rewrite_data_page(struct f2fs_io_info *); -void __f2fs_replace_block(struct f2fs_sb_info *, struct f2fs_summary *, - block_t, block_t, bool, bool); -void f2fs_replace_block(struct f2fs_sb_info *, struct dnode_of_data *, - block_t, block_t, unsigned char, bool, bool); -void allocate_data_block(struct f2fs_sb_info *, struct page *, - block_t, block_t *, struct f2fs_summary *, int); -void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); -void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *, block_t); -void write_data_summaries(struct f2fs_sb_info *, block_t); -void write_node_summaries(struct f2fs_sb_info *, block_t); -int lookup_journal_in_cursum(struct f2fs_journal *, int, unsigned int, int); -void flush_sit_entries(struct f2fs_sb_info *, struct cp_control *); -int build_segment_manager(struct f2fs_sb_info *); -void destroy_segment_manager(struct f2fs_sb_info *); +bool need_SSR(struct f2fs_sb_info *sbi); +void register_inmem_page(struct inode *inode, struct page *page); +void drop_inmem_pages(struct inode *inode); +void drop_inmem_page(struct inode *inode, struct page *page); +int commit_inmem_pages(struct inode *inode); +void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need); +void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi); +int f2fs_issue_flush(struct f2fs_sb_info *sbi); +int create_flush_cmd_control(struct f2fs_sb_info *sbi); +void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); +void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); +bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); +void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new); +void stop_discard_thread(struct f2fs_sb_info *sbi); +void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount); +void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); +void release_discard_addrs(struct f2fs_sb_info *sbi); +int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); +void allocate_new_segments(struct f2fs_sb_info *sbi); +int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); +bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc); +struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno); +void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr); +void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, + enum iostat_type io_type); +void write_node_page(unsigned int nid, struct f2fs_io_info *fio); +void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio); +int rewrite_data_page(struct f2fs_io_info *fio); +void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, + block_t old_blkaddr, block_t new_blkaddr, + bool recover_curseg, bool recover_newaddr); +void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, + block_t old_addr, block_t new_addr, + unsigned char version, bool recover_curseg, + bool recover_newaddr); +void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, + block_t old_blkaddr, block_t *new_blkaddr, + struct f2fs_summary *sum, int type, + struct f2fs_io_info *fio, bool add_list); +void f2fs_wait_on_page_writeback(struct page *page, + enum page_type type, bool ordered); +void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr); +void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk); +void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); +int lookup_journal_in_cursum(struct f2fs_journal *journal, int type, + unsigned int val, int alloc); +void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); +int build_segment_manager(struct f2fs_sb_info *sbi); +void destroy_segment_manager(struct f2fs_sb_info *sbi); int __init create_segment_manager_caches(void); void destroy_segment_manager_caches(void); /* * checkpoint.c */ -void f2fs_stop_checkpoint(struct f2fs_sb_info *, bool); -struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); -struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); -struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t); -bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int); -int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool); -void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t); -long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); -void add_ino_entry(struct f2fs_sb_info *, nid_t, int type); -void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type); -void release_ino_entry(struct f2fs_sb_info *, bool); -bool exist_written_data(struct f2fs_sb_info *, nid_t, int); -int f2fs_sync_inode_meta(struct f2fs_sb_info *); -int acquire_orphan_inode(struct f2fs_sb_info *); -void release_orphan_inode(struct f2fs_sb_info *); -void add_orphan_inode(struct inode *); -void remove_orphan_inode(struct f2fs_sb_info *, nid_t); -int recover_orphan_inodes(struct f2fs_sb_info *); -int get_valid_checkpoint(struct f2fs_sb_info *); -void update_dirty_page(struct inode *, struct page *); -void remove_dirty_inode(struct inode *); -int sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type); -int write_checkpoint(struct f2fs_sb_info *, struct cp_control *); -void init_ino_entry_info(struct f2fs_sb_info *); +void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io); +struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); +struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); +struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index); +bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); +int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, + int type, bool sync); +void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index); +long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, + long nr_to_write, enum iostat_type io_type); +void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); +void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); +void release_ino_entry(struct f2fs_sb_info *sbi, bool all); +bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode); +int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi); +int acquire_orphan_inode(struct f2fs_sb_info *sbi); +void release_orphan_inode(struct f2fs_sb_info *sbi); +void add_orphan_inode(struct inode *inode); +void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino); +int recover_orphan_inodes(struct f2fs_sb_info *sbi); +int get_valid_checkpoint(struct f2fs_sb_info *sbi); +void update_dirty_page(struct inode *inode, struct page *page); +void remove_dirty_inode(struct inode *inode); +int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type); +int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc); +void init_ino_entry_info(struct f2fs_sb_info *sbi); int __init create_checkpoint_caches(void); void destroy_checkpoint_caches(void); /* * data.c */ -void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int); -void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *, struct inode *, - struct page *, nid_t, enum page_type, int); -void f2fs_flush_merged_bios(struct f2fs_sb_info *); -int f2fs_submit_page_bio(struct f2fs_io_info *); -void f2fs_submit_page_mbio(struct f2fs_io_info *); -void set_data_blkaddr(struct dnode_of_data *); -void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t); -int reserve_new_blocks(struct dnode_of_data *, blkcnt_t); -int reserve_new_block(struct dnode_of_data *); -int f2fs_get_block(struct dnode_of_data *, pgoff_t); -ssize_t f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *); -int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); -struct page *get_read_data_page(struct inode *, pgoff_t, int, bool); -struct page *find_data_page(struct inode *, pgoff_t); -struct page *get_lock_data_page(struct inode *, pgoff_t, bool); -struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); -int do_write_data_page(struct f2fs_io_info *); -int f2fs_map_blocks(struct inode *, struct f2fs_map_blocks *, int, int); -int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); -void f2fs_set_page_dirty_nobuffers(struct page *); -void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); -int f2fs_release_page(struct page *, gfp_t); +void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); +void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, + struct inode *inode, nid_t ino, pgoff_t idx, + enum page_type type); +void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); +int f2fs_submit_page_bio(struct f2fs_io_info *fio); +int f2fs_submit_page_write(struct f2fs_io_info *fio); +struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, + block_t blk_addr, struct bio *bio); +int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr); +void set_data_blkaddr(struct dnode_of_data *dn); +void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr); +int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count); +int reserve_new_block(struct dnode_of_data *dn); +int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index); +int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from); +int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index); +struct page *get_read_data_page(struct inode *inode, pgoff_t index, + int op_flags, bool for_write); +struct page *find_data_page(struct inode *inode, pgoff_t index); +struct page *get_lock_data_page(struct inode *inode, pgoff_t index, + bool for_write); +struct page *get_new_data_page(struct inode *inode, + struct page *ipage, pgoff_t index, bool new_i_size); +int do_write_data_page(struct f2fs_io_info *fio); +int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, + int create, int flag); +int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len); +void f2fs_set_page_dirty_nobuffers(struct page *page); +int __f2fs_write_data_pages(struct address_space *mapping, + struct writeback_control *wbc, + enum iostat_type io_type); +void f2fs_invalidate_page(struct page *page, unsigned int offset, + unsigned int length); +int f2fs_release_page(struct page *page, gfp_t wait); #ifdef CONFIG_MIGRATION -int f2fs_migrate_page(struct address_space *, struct page *, struct page *, - enum migrate_mode); +int f2fs_migrate_page(struct address_space *mapping, struct page *newpage, + struct page *page, enum migrate_mode mode); #endif /* * gc.c */ -int start_gc_thread(struct f2fs_sb_info *); -void stop_gc_thread(struct f2fs_sb_info *); -block_t start_bidx_of_node(unsigned int, struct inode *); -int f2fs_gc(struct f2fs_sb_info *, bool); -void build_gc_manager(struct f2fs_sb_info *); +int start_gc_thread(struct f2fs_sb_info *sbi); +void stop_gc_thread(struct f2fs_sb_info *sbi); +block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode); +int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, + unsigned int segno); +void build_gc_manager(struct f2fs_sb_info *sbi); /* * recovery.c */ -int recover_fsync_data(struct f2fs_sb_info *, bool); -bool space_for_roll_forward(struct f2fs_sb_info *); +int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only); +bool space_for_roll_forward(struct f2fs_sb_info *sbi); /* * debug.c @@ -2198,10 +2698,15 @@ struct f2fs_stat_info { int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta; int inmem_pages; unsigned int ndirty_dirs, ndirty_files, ndirty_all; - int nats, dirty_nats, sits, dirty_sits, fnids; + int nats, dirty_nats, sits, dirty_sits; + int free_nids, avail_nids, alloc_nids; int total_count, utilization; - int bg_gc, wb_bios; - int inline_xattr, inline_inode, inline_dir, orphans; + int bg_gc, nr_wb_cp_data, nr_wb_data; + int nr_flushing, nr_flushed, nr_discarding, nr_discarded; + int nr_discard_cmd; + unsigned int undiscard_blks; + int inline_xattr, inline_inode, inline_dir, append, update, orphans; + int aw_cnt, max_aw_cnt, vw_cnt, max_vw_cnt; unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks; unsigned int bimodal, avg_vblocks; int util_free, util_valid, util_invalid; @@ -2273,11 +2778,33 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) ((sbi)->block_count[(curseg)->alloc_type]++) #define stat_inc_inplace_blocks(sbi) \ (atomic_inc(&(sbi)->inplace_count)) +#define stat_inc_atomic_write(inode) \ + (atomic_inc(&F2FS_I_SB(inode)->aw_cnt)) +#define stat_dec_atomic_write(inode) \ + (atomic_dec(&F2FS_I_SB(inode)->aw_cnt)) +#define stat_update_max_atomic_write(inode) \ + do { \ + int cur = atomic_read(&F2FS_I_SB(inode)->aw_cnt); \ + int max = atomic_read(&F2FS_I_SB(inode)->max_aw_cnt); \ + if (cur > max) \ + atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \ + } while (0) +#define stat_inc_volatile_write(inode) \ + (atomic_inc(&F2FS_I_SB(inode)->vw_cnt)) +#define stat_dec_volatile_write(inode) \ + (atomic_dec(&F2FS_I_SB(inode)->vw_cnt)) +#define stat_update_max_volatile_write(inode) \ + do { \ + int cur = atomic_read(&F2FS_I_SB(inode)->vw_cnt); \ + int max = atomic_read(&F2FS_I_SB(inode)->max_vw_cnt); \ + if (cur > max) \ + atomic_set(&F2FS_I_SB(inode)->max_vw_cnt, cur); \ + } while (0) #define stat_inc_seg_count(sbi, type, gc_type) \ do { \ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ - (si)->tot_segs++; \ - if (type == SUM_TYPE_DATA) { \ + si->tot_segs++; \ + if ((type) == SUM_TYPE_DATA) { \ si->data_segs++; \ si->bg_data_segs += (gc_type == BG_GC) ? 1 : 0; \ } else { \ @@ -2287,14 +2814,14 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) } while (0) #define stat_inc_tot_blk_count(si, blks) \ - (si->tot_blks += (blks)) + ((si)->tot_blks += (blks)) #define stat_inc_data_blk_count(sbi, blks, gc_type) \ do { \ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ stat_inc_tot_blk_count(si, blks); \ si->data_blks += (blks); \ - si->bg_data_blks += (gc_type == BG_GC) ? (blks) : 0; \ + si->bg_data_blks += ((gc_type) == BG_GC) ? (blks) : 0; \ } while (0) #define stat_inc_node_blk_count(sbi, blks, gc_type) \ @@ -2302,37 +2829,43 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) struct f2fs_stat_info *si = F2FS_STAT(sbi); \ stat_inc_tot_blk_count(si, blks); \ si->node_blks += (blks); \ - si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \ + si->bg_node_blks += ((gc_type) == BG_GC) ? (blks) : 0; \ } while (0) -int f2fs_build_stats(struct f2fs_sb_info *); -void f2fs_destroy_stats(struct f2fs_sb_info *); +int f2fs_build_stats(struct f2fs_sb_info *sbi); +void f2fs_destroy_stats(struct f2fs_sb_info *sbi); int __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); #else -#define stat_inc_cp_count(si) -#define stat_inc_bg_cp_count(si) -#define stat_inc_call_count(si) -#define stat_inc_bggc_count(si) -#define stat_inc_dirty_inode(sbi, type) -#define stat_dec_dirty_inode(sbi, type) -#define stat_inc_total_hit(sb) -#define stat_inc_rbtree_node_hit(sb) -#define stat_inc_largest_node_hit(sbi) -#define stat_inc_cached_node_hit(sbi) -#define stat_inc_inline_xattr(inode) -#define stat_dec_inline_xattr(inode) -#define stat_inc_inline_inode(inode) -#define stat_dec_inline_inode(inode) -#define stat_inc_inline_dir(inode) -#define stat_dec_inline_dir(inode) -#define stat_inc_seg_type(sbi, curseg) -#define stat_inc_block_count(sbi, curseg) -#define stat_inc_inplace_blocks(sbi) -#define stat_inc_seg_count(sbi, type, gc_type) -#define stat_inc_tot_blk_count(si, blks) -#define stat_inc_data_blk_count(sbi, blks, gc_type) -#define stat_inc_node_blk_count(sbi, blks, gc_type) +#define stat_inc_cp_count(si) do { } while (0) +#define stat_inc_bg_cp_count(si) do { } while (0) +#define stat_inc_call_count(si) do { } while (0) +#define stat_inc_bggc_count(si) do { } while (0) +#define stat_inc_dirty_inode(sbi, type) do { } while (0) +#define stat_dec_dirty_inode(sbi, type) do { } while (0) +#define stat_inc_total_hit(sb) do { } while (0) +#define stat_inc_rbtree_node_hit(sb) do { } while (0) +#define stat_inc_largest_node_hit(sbi) do { } while (0) +#define stat_inc_cached_node_hit(sbi) do { } while (0) +#define stat_inc_inline_xattr(inode) do { } while (0) +#define stat_dec_inline_xattr(inode) do { } while (0) +#define stat_inc_inline_inode(inode) do { } while (0) +#define stat_dec_inline_inode(inode) do { } while (0) +#define stat_inc_inline_dir(inode) do { } while (0) +#define stat_dec_inline_dir(inode) do { } while (0) +#define stat_inc_atomic_write(inode) do { } while (0) +#define stat_dec_atomic_write(inode) do { } while (0) +#define stat_update_max_atomic_write(inode) do { } while (0) +#define stat_inc_volatile_write(inode) do { } while (0) +#define stat_dec_volatile_write(inode) do { } while (0) +#define stat_update_max_volatile_write(inode) do { } while (0) +#define stat_inc_seg_type(sbi, curseg) do { } while (0) +#define stat_inc_block_count(sbi, curseg) do { } while (0) +#define stat_inc_inplace_blocks(sbi) do { } while (0) +#define stat_inc_seg_count(sbi, type, gc_type) do { } while (0) +#define stat_inc_tot_blk_count(si, blks) do { } while (0) +#define stat_inc_data_blk_count(sbi, blks, gc_type) do { } while (0) +#define stat_inc_node_blk_count(sbi, blks, gc_type) do { } while (0) static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } @@ -2355,52 +2888,78 @@ extern struct kmem_cache *inode_entry_slab; /* * inline.c */ -bool f2fs_may_inline_data(struct inode *); -bool f2fs_may_inline_dentry(struct inode *); -void read_inline_data(struct page *, struct page *); -bool truncate_inline_inode(struct page *, u64); -int f2fs_read_inline_data(struct inode *, struct page *); -int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); -int f2fs_convert_inline_inode(struct inode *); -int f2fs_write_inline_data(struct inode *, struct page *); -bool recover_inline_data(struct inode *, struct page *); -struct f2fs_dir_entry *find_in_inline_dir(struct inode *, - struct fscrypt_name *, struct page **); -int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); -int f2fs_add_inline_entry(struct inode *, const struct qstr *, - const struct qstr *, struct inode *, nid_t, umode_t); -void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, - struct inode *, struct inode *); -bool f2fs_empty_inline_dir(struct inode *); -int f2fs_read_inline_dir(struct file *, struct dir_context *, - struct fscrypt_str *); -int f2fs_inline_data_fiemap(struct inode *, - struct fiemap_extent_info *, __u64, __u64); +bool f2fs_may_inline_data(struct inode *inode); +bool f2fs_may_inline_dentry(struct inode *inode); +void read_inline_data(struct page *page, struct page *ipage); +void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from); +int f2fs_read_inline_data(struct inode *inode, struct page *page); +int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); +int f2fs_convert_inline_inode(struct inode *inode); +int f2fs_write_inline_data(struct inode *inode, struct page *page); +bool recover_inline_data(struct inode *inode, struct page *npage); +struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, + struct fscrypt_name *fname, struct page **res_page); +int make_empty_inline_dir(struct inode *inode, struct inode *parent, + struct page *ipage); +int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, + const struct qstr *orig_name, + struct inode *inode, nid_t ino, umode_t mode); +void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, + struct inode *dir, struct inode *inode); +bool f2fs_empty_inline_dir(struct inode *dir); +int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, + struct fscrypt_str *fstr); +int f2fs_inline_data_fiemap(struct inode *inode, + struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len); /* * shrinker.c */ -unsigned long f2fs_shrink_count(struct shrinker *, struct shrink_control *); -unsigned long f2fs_shrink_scan(struct shrinker *, struct shrink_control *); -void f2fs_join_shrinker(struct f2fs_sb_info *); -void f2fs_leave_shrinker(struct f2fs_sb_info *); +unsigned long f2fs_shrink_count(struct shrinker *shrink, + struct shrink_control *sc); +unsigned long f2fs_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc); +void f2fs_join_shrinker(struct f2fs_sb_info *sbi); +void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); /* * extent_cache.c */ -unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); -bool f2fs_init_extent_tree(struct inode *, struct f2fs_extent *); -void f2fs_drop_extent_tree(struct inode *); -unsigned int f2fs_destroy_extent_node(struct inode *); -void f2fs_destroy_extent_tree(struct inode *); -bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *); -void f2fs_update_extent_cache(struct dnode_of_data *); +struct rb_entry *__lookup_rb_tree(struct rb_root *root, + struct rb_entry *cached_re, unsigned int ofs); +struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, + struct rb_root *root, struct rb_node **parent, + unsigned int ofs); +struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, + struct rb_entry *cached_re, unsigned int ofs, + struct rb_entry **prev_entry, struct rb_entry **next_entry, + struct rb_node ***insert_p, struct rb_node **insert_parent, + bool force); +bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi, + struct rb_root *root); +unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink); +bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext); +void f2fs_drop_extent_tree(struct inode *inode); +unsigned int f2fs_destroy_extent_node(struct inode *inode); +void f2fs_destroy_extent_tree(struct inode *inode); +bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei); +void f2fs_update_extent_cache(struct dnode_of_data *dn); void f2fs_update_extent_cache_range(struct dnode_of_data *dn, - pgoff_t, block_t, unsigned int); -void init_extent_cache_info(struct f2fs_sb_info *); + pgoff_t fofs, block_t blkaddr, unsigned int len); +void init_extent_cache_info(struct f2fs_sb_info *sbi); int __init create_extent_cache(void); void destroy_extent_cache(void); +/* + * sysfs.c + */ +int __init f2fs_init_sysfs(void); +void f2fs_exit_sysfs(void); +int f2fs_register_sysfs(struct f2fs_sb_info *sbi); +void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi); + /* * crypto support */ @@ -2409,6 +2968,11 @@ static inline bool f2fs_encrypted_inode(struct inode *inode) return file_is_encrypt(inode); } +static inline bool f2fs_encrypted_file(struct inode *inode) +{ + return f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode); +} + static inline void f2fs_set_encrypted_inode(struct inode *inode) { #ifdef CONFIG_F2FS_FS_ENCRYPTION @@ -2426,9 +2990,45 @@ static inline int f2fs_sb_has_crypto(struct super_block *sb) return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT); } -static inline int f2fs_sb_mounted_hmsmr(struct super_block *sb) +static inline int f2fs_sb_mounted_blkzoned(struct super_block *sb) { - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_HMSMR); + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_BLKZONED); +} + +static inline int f2fs_sb_has_extra_attr(struct super_block *sb) +{ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_EXTRA_ATTR); +} + +static inline int f2fs_sb_has_project_quota(struct super_block *sb) +{ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_PRJQUOTA); +} + +static inline int f2fs_sb_has_inode_chksum(struct super_block *sb) +{ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CHKSUM); +} + +#ifdef CONFIG_BLK_DEV_ZONED +static inline int get_blkz_type(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t blkaddr) +{ + unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz; + int i; + + for (i = 0; i < sbi->s_ndevs; i++) + if (FDEV(i).bdev == bdev) + return FDEV(i).blkz_type[zno]; + return -EINVAL; +} +#endif + +static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi) +{ + struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev); + + return blk_queue_discard(q) || f2fs_sb_mounted_blkzoned(sbi->sb); } static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) @@ -2457,28 +3057,4 @@ static inline bool f2fs_may_encrypt(struct inode *inode) #endif } -#ifndef CONFIG_F2FS_FS_ENCRYPTION -#define fscrypt_set_d_op(i) -#define fscrypt_get_ctx fscrypt_notsupp_get_ctx -#define fscrypt_release_ctx fscrypt_notsupp_release_ctx -#define fscrypt_encrypt_page fscrypt_notsupp_encrypt_page -#define fscrypt_decrypt_page fscrypt_notsupp_decrypt_page -#define fscrypt_decrypt_bio_pages fscrypt_notsupp_decrypt_bio_pages -#define fscrypt_pullback_bio_page fscrypt_notsupp_pullback_bio_page -#define fscrypt_restore_control_page fscrypt_notsupp_restore_control_page -#define fscrypt_zeroout_range fscrypt_notsupp_zeroout_range -#define fscrypt_process_policy fscrypt_notsupp_process_policy -#define fscrypt_get_policy fscrypt_notsupp_get_policy -#define fscrypt_has_permitted_context fscrypt_notsupp_has_permitted_context -#define fscrypt_inherit_context fscrypt_notsupp_inherit_context -#define fscrypt_get_encryption_info fscrypt_notsupp_get_encryption_info -#define fscrypt_put_encryption_info fscrypt_notsupp_put_encryption_info -#define fscrypt_setup_filename fscrypt_notsupp_setup_filename -#define fscrypt_free_filename fscrypt_notsupp_free_filename -#define fscrypt_fname_encrypted_size fscrypt_notsupp_fname_encrypted_size -#define fscrypt_fname_alloc_buffer fscrypt_notsupp_fname_alloc_buffer -#define fscrypt_fname_free_buffer fscrypt_notsupp_fname_free_buffer -#define fscrypt_fname_disk_to_usr fscrypt_notsupp_fname_disk_to_usr -#define fscrypt_fname_usr_to_disk fscrypt_notsupp_fname_usr_to_disk -#endif #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 801111e1f8ef..658291ca4c01 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -32,6 +33,19 @@ #include "trace.h" #include +static int f2fs_filemap_fault(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vma->vm_file); + int err; + + down_read(&F2FS_I(inode)->i_mmap_sem); + err = filemap_fault(vma, vmf); + up_read(&F2FS_I(inode)->i_mmap_sem); + + return err; +} + static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { @@ -59,13 +73,14 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_balance_fs(sbi, dn.node_changed); file_update_time(vma->vm_file); + down_read(&F2FS_I(inode)->i_mmap_sem); lock_page(page); if (unlikely(page->mapping != inode->i_mapping || page_offset(page) > i_size_read(inode) || !PageUptodate(page))) { unlock_page(page); err = -EFAULT; - goto out; + goto out_sem; } /* @@ -85,17 +100,19 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, if (!PageUptodate(page)) SetPageUptodate(page); + f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE); + trace_f2fs_vm_page_mkwrite(page, DATA); mapped: /* fill the page */ f2fs_wait_on_page_writeback(page, DATA, false); /* wait for GCed encrypted page writeback */ - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) - f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr); + if (f2fs_encrypted_file(inode)) + f2fs_wait_on_block_writeback(sbi, dn.data_blkaddr); - /* if gced page is attached, don't write to cold segment */ - clear_cold_data(page); +out_sem: + up_read(&F2FS_I(inode)->i_mmap_sem); out: sb_end_pagefault(inode->i_sb); f2fs_update_time(sbi, REQ_TIME); @@ -103,7 +120,7 @@ out: } static const struct vm_operations_struct f2fs_file_vm_ops = { - .fault = filemap_fault, + .fault = f2fs_filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = f2fs_vm_page_mkwrite, }; @@ -118,11 +135,6 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) if (!dentry) return 0; - if (update_dent_inode(inode, inode, &dentry->d_name)) { - dput(dentry); - return 0; - } - *pino = parent_ino(dentry); dput(dentry); return 1; @@ -143,8 +155,6 @@ static inline bool need_do_checkpoint(struct inode *inode) need_cp = true; else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) need_cp = true; - else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) - need_cp = true; else if (test_opt(sbi, FASTBOOT)) need_cp = true; else if (sbi->active_logs == 2) @@ -170,7 +180,6 @@ static void try_to_fix_pino(struct inode *inode) nid_t pino; down_write(&fi->i_sem); - fi->xattr_ver = 0; if (file_wrong_pino(inode) && inode->i_nlink == 1 && get_parent_ino(inode, &pino)) { f2fs_i_pino_write(inode, pino); @@ -210,7 +219,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, } /* if the inode is dirty, let's recover all the time */ - if (!datasync && !f2fs_skip_inode_update(inode)) { + if (!f2fs_skip_inode_update(inode, datasync)) { f2fs_write_inode(inode, NULL); goto go_write; } @@ -264,14 +273,24 @@ sync_nodes: } if (need_inode_block_update(sbi, ino)) { - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, true); f2fs_write_inode(inode, NULL); goto sync_nodes; } - ret = wait_on_node_pages_writeback(sbi, ino); - if (ret) - goto out; + /* + * If it's atomic_write, it's just fine to keep write ordering. So + * here we don't need to wait for node write completion, since we use + * node chain which serializes node blocks. If one of node writes are + * reordered, we can see simply broken chain, resulting in stopping + * roll-forward recovery. It means we'll recover all or none node blocks + * given fsync mark. + */ + if (!atomic) { + ret = wait_on_node_pages_writeback(sbi, ino); + if (ret) + goto out; + } /* once recovery info is written, don't need to tack this */ remove_ino_entry(sbi, ino, APPEND_INO); @@ -279,7 +298,8 @@ sync_nodes: flush_out: remove_ino_entry(sbi, ino, UPDATE_INO); clear_inode_flag(inode, FI_UPDATE_WRITE); - ret = f2fs_issue_flush(sbi); + if (!atomic) + ret = f2fs_issue_flush(sbi); f2fs_update_time(sbi, REQ_TIME); out: trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); @@ -376,7 +396,8 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) dn.ofs_in_node++, pgofs++, data_ofs = (loff_t)pgofs << PAGE_SHIFT) { block_t blkaddr; - blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + blkaddr = datablock_addr(dn.inode, + dn.node_page, dn.ofs_in_node); if (__found_offset(blkaddr, dirty, pgofs, whence)) { f2fs_put_dnode(&dn); @@ -424,14 +445,6 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) struct inode *inode = file_inode(file); int err; - if (f2fs_encrypted_inode(inode)) { - err = fscrypt_get_encryption_info(inode); - if (err) - return 0; - if (!f2fs_encrypted_inode(inode)) - return -ENOKEY; - } - /* we don't need to use inline_data strictly */ err = f2fs_convert_inline_inode(inode); if (err) @@ -444,11 +457,10 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) static int f2fs_file_open(struct inode *inode, struct file *filp) { - int ret = generic_file_open(inode, filp); struct dentry *dir; - if (!ret && f2fs_encrypted_inode(inode)) { - ret = fscrypt_get_encryption_info(inode); + if (f2fs_encrypted_inode(inode)) { + int ret = fscrypt_get_encryption_info(inode); if (ret) return -EACCES; if (!fscrypt_has_encryption_key(inode)) @@ -461,7 +473,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) return -EPERM; } dput(dir); - return ret; + return dquot_file_open(inode, filp); } int truncate_data_blocks_range(struct dnode_of_data *dn, int count) @@ -470,9 +482,13 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) struct f2fs_node *raw_node; int nr_free = 0, ofs = dn->ofs_in_node, len = count; __le32 *addr; + int base = 0; + + if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) + base = get_extra_isize(dn->inode); raw_node = F2FS_NODE(dn->node_page); - addr = blkaddr_in_node(raw_node) + ofs; + addr = blkaddr_in_node(raw_node) + base + ofs; for (; count > 0; count--, addr++, dn->ofs_in_node++) { block_t blkaddr = le32_to_cpu(*addr); @@ -532,12 +548,14 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, page = get_lock_data_page(inode, index, true); if (IS_ERR(page)) - return 0; + return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); truncate_out: f2fs_wait_on_page_writeback(page, DATA, true); zero_user(page, offset, PAGE_SIZE - offset); - if (!cache_only || !f2fs_encrypted_inode(inode) || - !S_ISREG(inode->i_mode)) + + /* An encrypted inode should have a key and truncate the last page. */ + f2fs_bug_on(F2FS_I_SB(inode), cache_only && f2fs_encrypted_inode(inode)); + if (!cache_only) set_page_dirty(page); f2fs_put_page(page, 1); return 0; @@ -570,8 +588,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) } if (f2fs_has_inline_data(inode)) { - if (truncate_inline_inode(ipage, from)) - set_page_dirty(ipage); + truncate_inline_inode(inode, ipage, from); f2fs_put_page(ipage, 1); truncate_page = true; goto out; @@ -620,6 +637,12 @@ int f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) { + f2fs_show_injection_info(FAULT_TRUNCATE); + return -EIO; + } +#endif /* we should check inline_data size */ if (!f2fs_may_inline_data(inode)) { err = f2fs_convert_inline_inode(inode); @@ -632,16 +655,20 @@ int f2fs_truncate(struct inode *inode) return err; inode->i_mtime = inode->i_ctime = current_time(inode); - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, false); return 0; } int f2fs_getattr(struct vfsmount *mnt, - struct dentry *dentry, struct kstat *stat) + struct dentry *dentry, struct kstat *stat) { struct inode *inode = d_inode(dentry); generic_fillattr(inode, stat); - stat->blocks <<= 3; + + /* we need to show initial sectors used for inline_data/dentries */ + if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) + stat->blocks += (stat->size + 511) >> 9; + return 0; } @@ -679,28 +706,50 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); int err; + bool size_changed = false; err = setattr_prepare(dentry, attr); if (err) return err; - if (attr->ia_valid & ATTR_SIZE) { - if (f2fs_encrypted_inode(inode) && - fscrypt_get_encryption_info(inode)) - return -EACCES; + if (is_quota_modification(inode, attr)) { + err = dquot_initialize(inode); + if (err) + return err; + } + if ((attr->ia_valid & ATTR_UID && + !uid_eq(attr->ia_uid, inode->i_uid)) || + (attr->ia_valid & ATTR_GID && + !gid_eq(attr->ia_gid, inode->i_gid))) { + err = dquot_transfer(inode, attr); + if (err) + return err; + } - if (attr->ia_size <= i_size_read(inode)) { - truncate_setsize(inode, attr->ia_size); - err = f2fs_truncate(inode); + if (attr->ia_valid & ATTR_SIZE) { + if (f2fs_encrypted_inode(inode)) { + err = fscrypt_get_encryption_info(inode); + if (err) + return err; + if (!fscrypt_has_encryption_key(inode)) + return -ENOKEY; + } + + if (attr->ia_size <= i_size_read(inode)) { + down_write(&F2FS_I(inode)->i_mmap_sem); + truncate_setsize(inode, attr->ia_size); + err = f2fs_truncate(inode); + up_write(&F2FS_I(inode)->i_mmap_sem); if (err) return err; - f2fs_balance_fs(F2FS_I_SB(inode), true); } else { /* * do not trim all blocks after i_size if target size is * larger than i_size. */ + down_write(&F2FS_I(inode)->i_mmap_sem); truncate_setsize(inode, attr->ia_size); + up_write(&F2FS_I(inode)->i_mmap_sem); /* should convert inline inode here */ if (!f2fs_may_inline_data(inode)) { @@ -710,6 +759,8 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) } inode->i_mtime = inode->i_ctime = current_time(inode); } + + size_changed = true; } __setattr_copy(inode, attr); @@ -722,7 +773,12 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) } } - f2fs_mark_inode_dirty_sync(inode); + /* file size may changed here */ + f2fs_mark_inode_dirty_sync(inode, size_changed); + + /* inode change will produce dirty node pages flushed by checkpoint */ + f2fs_balance_fs(F2FS_I_SB(inode), true); + return err; } @@ -836,12 +892,14 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) blk_start = (loff_t)pg_start << PAGE_SHIFT; blk_end = (loff_t)pg_end << PAGE_SHIFT; + down_write(&F2FS_I(inode)->i_mmap_sem); truncate_inode_pages_range(mapping, blk_start, blk_end - 1); f2fs_lock_op(sbi); ret = truncate_hole(inode, pg_start, pg_end); f2fs_unlock_op(sbi); + up_write(&F2FS_I(inode)->i_mmap_sem); } } @@ -872,7 +930,8 @@ next_dnode: done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) - dn.ofs_in_node, len); for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { - *blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + *blkaddr = datablock_addr(dn.inode, + dn.node_page, dn.ofs_in_node); if (!is_checkpointed_data(sbi, *blkaddr)) { if (test_opt(sbi, LFS)) { @@ -948,15 +1007,15 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, ADDRS_PER_PAGE(dn.node_page, dst_inode) - dn.ofs_in_node, len - i); do { - dn.data_blkaddr = datablock_addr(dn.node_page, - dn.ofs_in_node); + dn.data_blkaddr = datablock_addr(dn.inode, + dn.node_page, dn.ofs_in_node); truncate_data_blocks_range(&dn, 1); if (do_replace[i]) { f2fs_i_blocks_write(src_inode, - 1, false); + 1, false, false); f2fs_i_blocks_write(dst_inode, - 1, true); + 1, true, false); f2fs_replace_block(sbi, &dn, dn.data_blkaddr, blkaddr[i], ni.version, true, false); @@ -1008,11 +1067,11 @@ static int __exchange_data_block(struct inode *src_inode, while (len) { olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len); - src_blkaddr = f2fs_kvzalloc(sizeof(block_t) * olen, GFP_KERNEL); + src_blkaddr = kvzalloc(sizeof(block_t) * olen, GFP_KERNEL); if (!src_blkaddr) return -ENOMEM; - do_replace = f2fs_kvzalloc(sizeof(int) * olen, GFP_KERNEL); + do_replace = kvzalloc(sizeof(int) * olen, GFP_KERNEL); if (!do_replace) { kvfree(src_blkaddr); return -ENOMEM; @@ -1080,16 +1139,17 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) pg_start = offset >> PAGE_SHIFT; pg_end = (offset + len) >> PAGE_SHIFT; + down_write(&F2FS_I(inode)->i_mmap_sem); /* write out all dirty pages from offset */ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); if (ret) - return ret; + goto out; truncate_pagecache(inode, offset); ret = f2fs_do_collapse(inode, pg_start, pg_end); if (ret) - return ret; + goto out; /* write out all moved pages, if possible */ filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); @@ -1102,6 +1162,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) if (!ret) f2fs_i_size_write(inode, new_size); +out: + up_write(&F2FS_I(inode)->i_mmap_sem); return ret; } @@ -1115,7 +1177,8 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, int ret; for (; index < end; index++, dn->ofs_in_node++) { - if (datablock_addr(dn->node_page, dn->ofs_in_node) == NULL_ADDR) + if (datablock_addr(dn->inode, dn->node_page, + dn->ofs_in_node) == NULL_ADDR) count++; } @@ -1126,8 +1189,8 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, dn->ofs_in_node = ofs_in_node; for (index = start; index < end; index++, dn->ofs_in_node++) { - dn->data_blkaddr = - datablock_addr(dn->node_page, dn->ofs_in_node); + dn->data_blkaddr = datablock_addr(dn->inode, + dn->node_page, dn->ofs_in_node); /* * reserve_new_blocks will not guarantee entire block * allocation. @@ -1166,9 +1229,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) return ret; + down_write(&F2FS_I(inode)->i_mmap_sem); ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); if (ret) - return ret; + goto out_sem; truncate_pagecache_range(inode, offset, offset + len - 1); @@ -1182,17 +1246,15 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = fill_zero(inode, pg_start, off_start, off_end - off_start); if (ret) - return ret; + goto out_sem; - if (offset + len > new_size) - new_size = offset + len; new_size = max_t(loff_t, new_size, offset + len); } else { if (off_start) { ret = fill_zero(inode, pg_start++, off_start, PAGE_SIZE - off_start); if (ret) - return ret; + goto out_sem; new_size = max_t(loff_t, new_size, (loff_t)pg_start << PAGE_SHIFT); @@ -1218,6 +1280,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = f2fs_do_zero_range(&dn, index, end); f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); + + f2fs_balance_fs(sbi, dn.node_changed); + if (ret) goto out; @@ -1238,6 +1303,8 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, out: if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) f2fs_i_size_write(inode, new_size); +out_sem: + up_write(&F2FS_I(inode)->i_mmap_sem); return ret; } @@ -1250,8 +1317,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) int ret = 0; new_size = i_size_read(inode) + len; - if (new_size > inode->i_sb->s_maxbytes) - return -EFBIG; + ret = inode_newsize_ok(inode, new_size); + if (ret) + return ret; if (offset >= i_size_read(inode)) return -EINVAL; @@ -1266,14 +1334,15 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi, true); + down_write(&F2FS_I(inode)->i_mmap_sem); ret = truncate_blocks(inode, i_size_read(inode), true); if (ret) - return ret; + goto out; /* write out all dirty pages from offset */ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); if (ret) - return ret; + goto out; truncate_pagecache(inode, offset); @@ -1302,6 +1371,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) if (!ret) f2fs_i_size_write(inode, new_size); +out: + up_write(&F2FS_I(inode)->i_mmap_sem); return ret; } @@ -1313,15 +1384,15 @@ static int expand_inode_data(struct inode *inode, loff_t offset, pgoff_t pg_end; loff_t new_size = i_size_read(inode); loff_t off_end; - int ret; + int err; - ret = inode_newsize_ok(inode, (len + offset)); - if (ret) - return ret; + err = inode_newsize_ok(inode, (len + offset)); + if (err) + return err; - ret = f2fs_convert_inline_inode(inode); - if (ret) - return ret; + err = f2fs_convert_inline_inode(inode); + if (err) + return err; f2fs_balance_fs(sbi, true); @@ -1333,12 +1404,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (off_end) map.m_len++; - ret = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); - if (ret) { + err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); + if (err) { pgoff_t last_off; if (!map.m_len) - return ret; + return err; last_off = map.m_lblk + map.m_len - 1; @@ -1352,7 +1423,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) f2fs_i_size_write(inode, new_size); - return ret; + return err; } static long f2fs_fallocate(struct file *file, int mode, @@ -1393,7 +1464,9 @@ static long f2fs_fallocate(struct file *file, int mode, if (!ret) { inode->i_mtime = inode->i_ctime = current_time(inode); - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, false); + if (mode & FALLOC_FL_KEEP_SIZE) + file_set_keep_isize(inode); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); } @@ -1419,6 +1492,7 @@ static int f2fs_release_file(struct inode *inode, struct file *filp) drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { clear_inode_flag(inode, FI_VOLATILE_FILE); + stat_dec_volatile_write(inode); set_inode_flag(inode, FI_DROP_CACHE); filemap_fdatawrite(inode->i_mapping); clear_inode_flag(inode, FI_DROP_CACHE); @@ -1426,17 +1500,20 @@ static int f2fs_release_file(struct inode *inode, struct file *filp) return 0; } -#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) -#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) - -static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) +static int f2fs_file_flush(struct file *file, fl_owner_t id) { - if (S_ISDIR(mode)) - return flags; - else if (S_ISREG(mode)) - return flags & F2FS_REG_FLMASK; - else - return flags & F2FS_OTHER_FLMASK; + struct inode *inode = file_inode(file); + + /* + * If the process doing a transaction is crashed, we should do + * roll-back. Otherwise, other reader/write can see corrupted database + * until all the writers close its file. Since this should be done + * before dropping file lock, it needs to do in ->flush. + */ + if (f2fs_is_atomic_file(inode) && + F2FS_I(inode)->inmem_task == current) + drop_inmem_pages(inode); + return 0; } static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) @@ -1465,28 +1542,34 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) if (ret) return ret; - flags = f2fs_mask_flags(inode->i_mode, flags); - inode_lock(inode); + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) { + ret = -EPERM; + goto unlock_out; + } + + flags = f2fs_mask_flags(inode->i_mode, flags); + oldflags = fi->i_flags; if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { if (!capable(CAP_LINUX_IMMUTABLE)) { - inode_unlock(inode); ret = -EPERM; - goto out; + goto unlock_out; } } flags = flags & FS_FL_USER_MODIFIABLE; flags |= oldflags & ~FS_FL_USER_MODIFIABLE; fi->i_flags = flags; - inode_unlock(inode); inode->i_ctime = current_time(inode); f2fs_set_inode_flags(inode); -out: + f2fs_mark_inode_dirty_sync(inode, false); +unlock_out: + inode_unlock(inode); mnt_drop_write_file(filp); return ret; } @@ -1506,6 +1589,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) if (!inode_owner_or_capable(inode)) return -EACCES; + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + ret = mnt_want_write_file(filp); if (ret) return ret; @@ -1520,17 +1606,26 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) goto out; set_inode_flag(inode, FI_ATOMIC_FILE); + set_inode_flag(inode, FI_HOT_DATA); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); if (!get_dirty_pages(inode)) - goto out; + goto inc_stat; f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, "Unexpected flush for atomic writes: ino=%lu, npages=%u", inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); - if (ret) + if (ret) { clear_inode_flag(inode, FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_HOT_DATA); + goto out; + } + +inc_stat: + F2FS_I(inode)->inmem_task = current; + stat_inc_atomic_write(inode); + stat_update_max_atomic_write(inode); out: inode_unlock(inode); mnt_drop_write_file(filp); @@ -1555,15 +1650,19 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) goto err_out; if (f2fs_is_atomic_file(inode)) { - clear_inode_flag(inode, FI_ATOMIC_FILE); ret = commit_inmem_pages(inode); - if (ret) { - set_inode_flag(inode, FI_ATOMIC_FILE); + if (ret) goto err_out; - } - } - ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); + ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); + if (!ret) { + clear_inode_flag(inode, FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_HOT_DATA); + stat_dec_atomic_write(inode); + } + } else { + ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); + } err_out: inode_unlock(inode); mnt_drop_write_file(filp); @@ -1578,6 +1677,9 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) if (!inode_owner_or_capable(inode)) return -EACCES; + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + ret = mnt_want_write_file(filp); if (ret) return ret; @@ -1591,6 +1693,9 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) if (ret) goto out; + stat_inc_volatile_write(inode); + stat_update_max_volatile_write(inode); + set_inode_flag(inode, FI_VOLATILE_FILE); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); out: @@ -1646,6 +1751,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { clear_inode_flag(inode, FI_VOLATILE_FILE); + stat_dec_volatile_write(inode); ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); } @@ -1691,7 +1797,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) f2fs_stop_checkpoint(sbi, false); break; case F2FS_GOING_DOWN_METAFLUSH: - sync_meta_pages(sbi, META, LONG_MAX); + sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); f2fs_stop_checkpoint(sbi, false); break; default: @@ -1752,31 +1858,16 @@ static bool uuid_is_nonzero(__u8 u[16]) static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { - struct fscrypt_policy policy; struct inode *inode = file_inode(filp); - if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg, - sizeof(policy))) - return -EFAULT; - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); - return fscrypt_process_policy(filp, &policy); + return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); } static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) { - struct fscrypt_policy policy; - struct inode *inode = file_inode(filp); - int err; - - err = fscrypt_get_policy(inode, &policy); - if (err) - return err; - - if (copy_to_user((struct fscrypt_policy __user *)arg, &policy, sizeof(policy))) - return -EFAULT; - return 0; + return fscrypt_ioctl_get_policy(filp, (void __user *)arg); } static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) @@ -1842,7 +1933,51 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg) mutex_lock(&sbi->gc_mutex); } - ret = f2fs_gc(sbi, sync); + ret = f2fs_gc(sbi, sync, true, NULL_SEGNO); +out: + mnt_drop_write_file(filp); + return ret; +} + +static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_gc_range range; + u64 end; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg, + sizeof(range))) + return -EFAULT; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + end = range.start + range.len; + if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) + return -EINVAL; +do_more: + if (!range.sync) { + if (!mutex_trylock(&sbi->gc_mutex)) { + ret = -EBUSY; + goto out; + } + } else { + mutex_lock(&sbi->gc_mutex); + } + + ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start)); + range.start += sbi->blocks_per_seg; + if (range.start <= end) + goto do_more; out: mnt_drop_write_file(filp); return ret; @@ -1876,17 +2011,16 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, { struct inode *inode = file_inode(filp); struct f2fs_map_blocks map = { .m_next_pgofs = NULL }; - struct extent_info ei; + struct extent_info ei = {0,0,0}; pgoff_t pg_start, pg_end; unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; - unsigned int pages_per_sec = sbi->segs_per_sec * blk_per_seg; block_t blk_end = 0; bool fragmented = false; int err; /* if in-place-update policy is enabled, don't waste time here */ - if (need_inplace_update(inode)) + if (need_inplace_update_policy(inode, NULL)) return -EINVAL; pg_start = range->start >> PAGE_SHIFT; @@ -1920,7 +2054,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, */ while (map.m_lblk < pg_end) { map.m_len = pg_end - map.m_lblk; - err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ); + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT); if (err) goto out; @@ -1944,7 +2078,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, map.m_lblk = pg_start; map.m_len = pg_end - pg_start; - sec_num = (map.m_len + pages_per_sec - 1) / pages_per_sec; + sec_num = (map.m_len + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi); /* * make sure there are enough free section for LFS allocation, this can @@ -1962,7 +2096,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, do_map: map.m_len = pg_end - map.m_lblk; - err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ); + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT); if (err) goto clear_out; @@ -2021,42 +2155,40 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!S_ISREG(inode->i_mode)) + if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode)) + return -EINVAL; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + if (copy_from_user(&range, (struct f2fs_defragment __user *)arg, + sizeof(range))) + return -EFAULT; + + /* verify alignment of offset & size */ + if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1)) + return -EINVAL; + + if (unlikely((range.start + range.len) >> PAGE_SHIFT > + sbi->max_file_blocks)) return -EINVAL; err = mnt_want_write_file(filp); if (err) return err; - if (f2fs_readonly(sbi->sb)) { - err = -EROFS; - goto out; - } - - if (copy_from_user(&range, (struct f2fs_defragment __user *)arg, - sizeof(range))) { - err = -EFAULT; - goto out; - } - - /* verify alignment of offset & size */ - if (range.start & (F2FS_BLKSIZE - 1) || - range.len & (F2FS_BLKSIZE - 1)) { - err = -EINVAL; - goto out; - } - err = f2fs_defragment_range(sbi, filp, &range); + mnt_drop_write_file(filp); + f2fs_update_time(sbi, REQ_TIME); if (err < 0) - goto out; + return err; if (copy_to_user((struct f2fs_defragment __user *)arg, &range, sizeof(range))) - err = -EFAULT; -out: - mnt_drop_write_file(filp); - return err; + return -EFAULT; + + return 0; } static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, @@ -2190,6 +2322,8 @@ static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) range.pos_out, range.len); mnt_drop_write_file(filp); + if (err) + goto err_out; if (copy_to_user((struct f2fs_move_range __user *)arg, &range, sizeof(range))) @@ -2199,6 +2333,79 @@ err_out: return err; } +static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct sit_info *sm = SIT_I(sbi); + unsigned int start_segno = 0, end_segno = 0; + unsigned int dev_start_segno = 0, dev_end_segno = 0; + struct f2fs_flush_device range; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg, + sizeof(range))) + return -EFAULT; + + if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num || + sbi->segs_per_sec != 1) { + f2fs_msg(sbi->sb, KERN_WARNING, + "Can't flush %u in %d for segs_per_sec %u != 1\n", + range.dev_num, sbi->s_ndevs, + sbi->segs_per_sec); + return -EINVAL; + } + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + if (range.dev_num != 0) + dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk); + dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk); + + start_segno = sm->last_victim[FLUSH_DEVICE]; + if (start_segno < dev_start_segno || start_segno >= dev_end_segno) + start_segno = dev_start_segno; + end_segno = min(start_segno + range.segments, dev_end_segno); + + while (start_segno < end_segno) { + if (!mutex_trylock(&sbi->gc_mutex)) { + ret = -EBUSY; + goto out; + } + sm->last_victim[GC_CB] = end_segno + 1; + sm->last_victim[GC_GREEDY] = end_segno + 1; + sm->last_victim[ALLOC_NEXT] = end_segno + 1; + ret = f2fs_gc(sbi, true, true, start_segno); + if (ret == -EAGAIN) + ret = 0; + else if (ret < 0) + break; + start_segno++; + } +out: + mnt_drop_write_file(filp); + return ret; +} + +static int f2fs_ioc_get_features(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature); + + /* Must validate to set it with SQLite behavior in Android. */ + sb_feature |= F2FS_FEATURE_ATOMIC_WRITE; + + return put_user(sb_feature, (u32 __user *)arg); +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { @@ -2230,12 +2437,18 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_get_encryption_pwsalt(filp, arg); case F2FS_IOC_GARBAGE_COLLECT: return f2fs_ioc_gc(filp, arg); + case F2FS_IOC_GARBAGE_COLLECT_RANGE: + return f2fs_ioc_gc_range(filp, arg); case F2FS_IOC_WRITE_CHECKPOINT: return f2fs_ioc_write_checkpoint(filp, arg); case F2FS_IOC_DEFRAGMENT: return f2fs_ioc_defragment(filp, arg); case F2FS_IOC_MOVE_RANGE: return f2fs_ioc_move_range(filp, arg); + case F2FS_IOC_FLUSH_DEVICE: + return f2fs_ioc_flush_device(filp, arg); + case F2FS_IOC_GET_FEATURES: + return f2fs_ioc_get_features(filp, arg); default: return -ENOTTY; } @@ -2248,20 +2461,26 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct blk_plug plug; ssize_t ret; - if (f2fs_encrypted_inode(inode) && - !fscrypt_has_encryption_key(inode) && - fscrypt_get_encryption_info(inode)) - return -EACCES; - inode_lock(inode); ret = generic_write_checks(iocb, from); if (ret > 0) { - ret = f2fs_preallocate_blocks(iocb, from); - if (!ret) { - blk_start_plug(&plug); - ret = __generic_file_write_iter(iocb, from); - blk_finish_plug(&plug); + int err; + + if (iov_iter_fault_in_readable(from, iov_iter_count(from))) + set_inode_flag(inode, FI_NO_PREALLOC); + + err = f2fs_preallocate_blocks(iocb, from); + if (err) { + inode_unlock(inode); + return err; } + blk_start_plug(&plug); + ret = __generic_file_write_iter(iocb, from); + blk_finish_plug(&plug); + clear_inode_flag(inode, FI_NO_PREALLOC); + + if (ret > 0) + f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); } inode_unlock(inode); @@ -2293,10 +2512,12 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_GET_ENCRYPTION_PWSALT: case F2FS_IOC_GET_ENCRYPTION_POLICY: case F2FS_IOC_GARBAGE_COLLECT: + case F2FS_IOC_GARBAGE_COLLECT_RANGE: case F2FS_IOC_WRITE_CHECKPOINT: case F2FS_IOC_DEFRAGMENT: - break; case F2FS_IOC_MOVE_RANGE: + case F2FS_IOC_FLUSH_DEVICE: + case F2FS_IOC_GET_FEATURES: break; default: return -ENOIOCTLCMD; @@ -2312,6 +2533,7 @@ const struct file_operations f2fs_file_operations = { .open = f2fs_file_open, .release = f2fs_release_file, .mmap = f2fs_file_mmap, + .flush = f2fs_file_flush, .fsync = f2fs_sync_file, .fallocate = f2fs_fallocate, .unlocked_ioctl = f2fs_ioctl, diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 34a69e7ed90b..bfe6a8ccc3a0 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -28,17 +28,23 @@ static int gc_thread_func(void *data) struct f2fs_sb_info *sbi = data; struct f2fs_gc_kthread *gc_th = sbi->gc_thread; wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; - long wait_ms; + unsigned int wait_ms; wait_ms = gc_th->min_sleep_time; + set_freezable(); do { + wait_event_interruptible_timeout(*wq, + kthread_should_stop() || freezing(current) || + gc_th->gc_wake, + msecs_to_jiffies(wait_ms)); + + /* give it a try one time */ + if (gc_th->gc_wake) + gc_th->gc_wake = 0; + if (try_to_freeze()) continue; - else - wait_event_interruptible_timeout(*wq, - kthread_should_stop(), - msecs_to_jiffies(wait_ms)); if (kthread_should_stop()) break; @@ -48,10 +54,15 @@ static int gc_thread_func(void *data) } #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(sbi, FAULT_CHECKPOINT)) + if (time_to_inject(sbi, FAULT_CHECKPOINT)) { + f2fs_show_injection_info(FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); + } #endif + if (!sb_start_write_trylock(sbi->sb)) + continue; + /* * [GC triggering condition] * 0. GC is not conducted currently. @@ -66,23 +77,28 @@ static int gc_thread_func(void *data) * So, I'd like to wait some time to collect dirty segments. */ if (!mutex_trylock(&sbi->gc_mutex)) - continue; + goto next; + + if (gc_th->gc_urgent) { + wait_ms = gc_th->urgent_sleep_time; + goto do_gc; + } if (!is_idle(sbi)) { increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); - continue; + goto next; } if (has_enough_invalid_blocks(sbi)) decrease_sleep_time(gc_th, &wait_ms); else increase_sleep_time(gc_th, &wait_ms); - +do_gc: stat_inc_bggc_count(sbi); /* if return value is not zero, no victim was selected */ - if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC))) + if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO)) wait_ms = gc_th->no_gc_sleep_time; trace_f2fs_background_gc(sbi->sb, wait_ms, @@ -90,6 +106,8 @@ static int gc_thread_func(void *data) /* balancing f2fs's metadata periodically */ f2fs_balance_fs_bg(sbi); +next: + sb_end_write(sbi->sb); } while (!kthread_should_stop()); return 0; @@ -107,11 +125,14 @@ int start_gc_thread(struct f2fs_sb_info *sbi) goto out; } + gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; gc_th->gc_idle = 0; + gc_th->gc_urgent = 0; + gc_th->gc_wake= 0; sbi->gc_thread = gc_th; init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); @@ -170,7 +191,11 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, if (gc_type != FG_GC && p->max_search > sbi->max_victim_search) p->max_search = sbi->max_victim_search; - p->offset = sbi->last_victim[p->gc_mode]; + /* let's select beginning hot/small space first */ + if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) + p->offset = 0; + else + p->offset = SIT_I(sbi)->last_victim[p->gc_mode]; } static unsigned int get_max_cost(struct f2fs_sb_info *sbi, @@ -180,7 +205,7 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi, if (p->alloc_mode == SSR) return sbi->blocks_per_seg; if (p->gc_mode == GC_GREEDY) - return sbi->blocks_per_seg * p->ofs_unit; + return 2 * sbi->blocks_per_seg * p->ofs_unit; else if (p->gc_mode == GC_CB) return UINT_MAX; else /* No other gc_mode */ @@ -205,7 +230,7 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) continue; clear_bit(secno, dirty_i->victim_secmap); - return secno * sbi->segs_per_sec; + return GET_SEG_FROM_SEC(sbi, secno); } return NULL_SEGNO; } @@ -213,8 +238,8 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); - unsigned int secno = GET_SECNO(sbi, segno); - unsigned int start = secno * sbi->segs_per_sec; + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + unsigned int start = GET_SEG_FROM_SEC(sbi, secno); unsigned long long mtime = 0; unsigned int vblocks; unsigned char age = 0; @@ -223,7 +248,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) for (i = 0; i < sbi->segs_per_sec; i++) mtime += get_seg_entry(sbi, start + i)->mtime; - vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); + vblocks = get_valid_blocks(sbi, segno, true); mtime = div_u64(mtime, sbi->segs_per_sec); vblocks = div_u64(vblocks, sbi->segs_per_sec); @@ -242,6 +267,16 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); } +static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + unsigned int valid_blocks = + get_valid_blocks(sbi, segno, true); + + return IS_DATASEG(get_seg_entry(sbi, segno)->type) ? + valid_blocks * 2 : valid_blocks; +} + static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, struct victim_sel_policy *p) { @@ -250,7 +285,7 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, /* alloc_mode == LFS */ if (p->gc_mode == GC_GREEDY) - return get_valid_blocks(sbi, segno, sbi->segs_per_sec); + return get_greedy_cost(sbi, segno); else return get_cb_cost(sbi, segno); } @@ -279,6 +314,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, unsigned int *result, int gc_type, int type, char alloc_mode) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct sit_info *sm = SIT_I(sbi); struct victim_sel_policy p; unsigned int secno, last_victim; unsigned int last_segment = MAIN_SEGS(sbi); @@ -292,10 +328,18 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, p.min_segno = NULL_SEGNO; p.min_cost = get_max_cost(sbi, &p); + if (*result != NULL_SEGNO) { + if (IS_DATASEG(get_seg_entry(sbi, *result)->type) && + get_valid_blocks(sbi, *result, false) && + !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) + p.min_segno = *result; + goto out; + } + if (p.max_search == 0) goto out; - last_victim = sbi->last_victim[p.gc_mode]; + last_victim = sm->last_victim[p.gc_mode]; if (p.alloc_mode == LFS && gc_type == FG_GC) { p.min_segno = check_bg_victims(sbi); if (p.min_segno != NULL_SEGNO) @@ -308,9 +352,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, segno = find_next_bit(p.dirty_segmap, last_segment, p.offset); if (segno >= last_segment) { - if (sbi->last_victim[p.gc_mode]) { - last_segment = sbi->last_victim[p.gc_mode]; - sbi->last_victim[p.gc_mode] = 0; + if (sm->last_victim[p.gc_mode]) { + last_segment = + sm->last_victim[p.gc_mode]; + sm->last_victim[p.gc_mode] = 0; p.offset = 0; continue; } @@ -327,7 +372,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, nsearched++; } - secno = GET_SECNO(sbi, segno); + secno = GET_SEC_FROM_SEG(sbi, segno); if (sec_usage_check(sbi, secno)) goto next; @@ -345,17 +390,18 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, } next: if (nsearched >= p.max_search) { - if (!sbi->last_victim[p.gc_mode] && segno <= last_victim) - sbi->last_victim[p.gc_mode] = last_victim + 1; + if (!sm->last_victim[p.gc_mode] && segno <= last_victim) + sm->last_victim[p.gc_mode] = last_victim + 1; else - sbi->last_victim[p.gc_mode] = segno + 1; + sm->last_victim[p.gc_mode] = segno + 1; + sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi); break; } } if (p.min_segno != NULL_SEGNO) { got_it: if (p.alloc_mode == LFS) { - secno = GET_SECNO(sbi, p.min_segno); + secno = GET_SEC_FROM_SEG(sbi, p.min_segno); if (gc_type == FG_GC) sbi->cur_victim_sec = secno; else @@ -538,12 +584,14 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, get_node_info(sbi, nid, dni); if (sum->version != dni->version) { - f2fs_put_page(node_page, 1); - return false; + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: valid data with mismatched node version.", + __func__); + set_sbi_flag(sbi, SBI_NEED_FSCK); } *nofs = ofs_of_node(node_page); - source_blkaddr = datablock_addr(node_page, ofs_in_node); + source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node); f2fs_put_page(node_page, 1); if (source_blkaddr != blkaddr) @@ -551,14 +599,21 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return true; } -static void move_encrypted_block(struct inode *inode, block_t bidx) +/* + * Move data block via META_MAPPING while keeping locked data page. + * This can be used to move blocks, aka LBAs, directly on disk. + */ +static void move_data_block(struct inode *inode, block_t bidx, + unsigned int segno, int off) { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), .type = DATA, + .temp = COLD, .op = REQ_OP_READ, - .op_flags = READ_SYNC, + .op_flags = 0, .encrypted_page = NULL, + .in_list = false, }; struct dnode_of_data dn; struct f2fs_summary sum; @@ -572,6 +627,12 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) if (!page) return; + if (!check_valid_map(F2FS_I_SB(inode), segno, off)) + goto out; + + if (f2fs_is_atomic_file(inode)) + goto out; + set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE); if (err) @@ -596,7 +657,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, - &sum, CURSEG_COLD_DATA); + &sum, CURSEG_COLD_DATA, NULL, false); fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); @@ -632,9 +693,11 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) f2fs_wait_on_page_writeback(dn.node_page, NODE, true); fio.op = REQ_OP_WRITE; - fio.op_flags = WRITE_SYNC; + fio.op_flags = REQ_SYNC; fio.new_blkaddr = newaddr; - f2fs_submit_page_mbio(&fio); + f2fs_submit_page_write(&fio); + + f2fs_update_iostat(fio.sbi, FS_GC_DATA_IO, F2FS_BLKSIZE); f2fs_update_data_blkaddr(&dn, newaddr); set_inode_flag(inode, FI_APPEND_WRITE); @@ -652,7 +715,8 @@ out: f2fs_put_page(page, 1); } -static void move_data_page(struct inode *inode, block_t bidx, int gc_type) +static void move_data_page(struct inode *inode, block_t bidx, int gc_type, + unsigned int segno, int off) { struct page *page; @@ -660,6 +724,12 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type) if (IS_ERR(page)) return; + if (!check_valid_map(F2FS_I_SB(inode), segno, off)) + goto out; + + if (f2fs_is_atomic_file(inode)) + goto out; + if (gc_type == BG_GC) { if (PageWriteback(page)) goto out; @@ -669,10 +739,14 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type) struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), .type = DATA, + .temp = COLD, .op = REQ_OP_WRITE, - .op_flags = WRITE_SYNC, + .op_flags = REQ_SYNC, + .old_blkaddr = NULL_ADDR, .page = page, .encrypted_page = NULL, + .need_lock = LOCK_REQ, + .io_type = FS_GC_DATA_IO, }; bool is_dirty = PageDirty(page); int err; @@ -680,8 +754,10 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type) retry: set_page_dirty(page); f2fs_wait_on_page_writeback(page, DATA, true); - if (clear_page_dirty_for_io(page)) + if (clear_page_dirty_for_io(page)) { inode_dec_dirty_pages(inode); + remove_dirty_inode(inode); + } set_cold_data(page); @@ -690,8 +766,6 @@ retry: congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; } - - clear_cold_data(page); } out: f2fs_put_page(page, 1); @@ -761,8 +835,7 @@ next_step: continue; /* if encrypted inode, let's go phase 3 */ - if (f2fs_encrypted_inode(inode) && - S_ISREG(inode->i_mode)) { + if (f2fs_encrypted_file(inode)) { add_gc_inode(gc_list, inode); continue; } @@ -796,14 +869,18 @@ next_step: continue; } locked = true; + + /* wait for all inflight aio data */ + inode_dio_wait(inode); } start_bidx = start_bidx_of_node(nofs, inode) + ofs_in_node; - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) - move_encrypted_block(inode, start_bidx); + if (f2fs_encrypted_file(inode)) + move_data_block(inode, start_bidx, segno, off); else - move_data_page(inode, start_bidx, gc_type); + move_data_page(inode, start_bidx, gc_type, + segno, off); if (locked) { up_write(&fi->dio_rwsem[WRITE]); @@ -840,7 +917,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, struct blk_plug plug; unsigned int segno = start_segno; unsigned int end_segno = start_segno + sbi->segs_per_sec; - int sec_freed = 0; + int seg_freed = 0; unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; @@ -864,7 +941,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, GET_SUM_BLOCK(sbi, segno)); f2fs_put_page(sum_page, 0); - if (get_valid_blocks(sbi, segno, 1) == 0 || + if (get_valid_blocks(sbi, segno, false) == 0 || !PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi))) goto next; @@ -879,7 +956,6 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, * - mutex_lock(sentry_lock) - change_curseg() * - lock_page(sum_page) */ - if (type == SUM_TYPE_NODE) gc_node_segment(sbi, sum->entries, segno, gc_type); else @@ -887,87 +963,113 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, gc_type); stat_inc_seg_count(sbi, type, gc_type); + + if (gc_type == FG_GC && + get_valid_blocks(sbi, segno, false) == 0) + seg_freed++; next: f2fs_put_page(sum_page, 0); } if (gc_type == FG_GC) - f2fs_submit_merged_bio(sbi, - (type == SUM_TYPE_NODE) ? NODE : DATA, WRITE); + f2fs_submit_merged_write(sbi, + (type == SUM_TYPE_NODE) ? NODE : DATA); blk_finish_plug(&plug); - if (gc_type == FG_GC && - get_valid_blocks(sbi, start_segno, sbi->segs_per_sec) == 0) - sec_freed = 1; - stat_inc_call_count(sbi->stat_info); - return sec_freed; + return seg_freed; } -int f2fs_gc(struct f2fs_sb_info *sbi, bool sync) +int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, + bool background, unsigned int segno) { - unsigned int segno; int gc_type = sync ? FG_GC : BG_GC; - int sec_freed = 0; - int ret = -EINVAL; + int sec_freed = 0, seg_freed = 0, total_freed = 0; + int ret = 0; struct cp_control cpc; + unsigned int init_segno = segno; struct gc_inode_list gc_list = { .ilist = LIST_HEAD_INIT(gc_list.ilist), .iroot = RADIX_TREE_INIT(GFP_NOFS), }; + trace_f2fs_gc_begin(sbi->sb, sync, background, + get_pages(sbi, F2FS_DIRTY_NODES), + get_pages(sbi, F2FS_DIRTY_DENTS), + get_pages(sbi, F2FS_DIRTY_IMETA), + free_sections(sbi), + free_segments(sbi), + reserved_segments(sbi), + prefree_segments(sbi)); + cpc.reason = __get_cp_reason(sbi); gc_more: - segno = NULL_SEGNO; - - if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) + if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) { + ret = -EINVAL; goto stop; + } if (unlikely(f2fs_cp_error(sbi))) { ret = -EIO; goto stop; } - if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed, 0)) { - gc_type = FG_GC; + if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) { /* - * If there is no victim and no prefree segment but still not - * enough free sections, we should flush dent/node blocks and do - * garbage collections. + * For example, if there are many prefree_segments below given + * threshold, we can make them free by checkpoint. Then, we + * secure free segments which doesn't need fggc any more. */ - if (__get_victim(sbi, &segno, gc_type) || - prefree_segments(sbi)) { - ret = write_checkpoint(sbi, &cpc); - if (ret) - goto stop; - segno = NULL_SEGNO; - } else if (has_not_enough_free_secs(sbi, 0, 0)) { + if (prefree_segments(sbi)) { ret = write_checkpoint(sbi, &cpc); if (ret) goto stop; } + if (has_not_enough_free_secs(sbi, 0, 0)) + gc_type = FG_GC; } - if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type)) + /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */ + if (gc_type == BG_GC && !background) { + ret = -EINVAL; goto stop; - ret = 0; + } + if (!__get_victim(sbi, &segno, gc_type)) { + ret = -ENODATA; + goto stop; + } - if (do_garbage_collect(sbi, segno, &gc_list, gc_type) && - gc_type == FG_GC) + seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type); + if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec) sec_freed++; + total_freed += seg_freed; if (gc_type == FG_GC) sbi->cur_victim_sec = NULL_SEGNO; if (!sync) { - if (has_not_enough_free_secs(sbi, sec_freed, 0)) + if (has_not_enough_free_secs(sbi, sec_freed, 0)) { + segno = NULL_SEGNO; goto gc_more; + } if (gc_type == FG_GC) ret = write_checkpoint(sbi, &cpc); } stop: + SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0; + SIT_I(sbi)->last_victim[FLUSH_DEVICE] = init_segno; + + trace_f2fs_gc_end(sbi->sb, ret, total_freed, sec_freed, + get_pages(sbi, F2FS_DIRTY_NODES), + get_pages(sbi, F2FS_DIRTY_DENTS), + get_pages(sbi, F2FS_DIRTY_IMETA), + free_sections(sbi), + free_segments(sbi), + reserved_segments(sbi), + prefree_segments(sbi)); + mutex_unlock(&sbi->gc_mutex); put_gc_inode(&gc_list); @@ -979,7 +1081,7 @@ stop: void build_gc_manager(struct f2fs_sb_info *sbi) { - u64 main_count, resv_count, ovp_count, blocks_per_sec; + u64 main_count, resv_count, ovp_count; DIRTY_I(sbi)->v_ops = &default_v_ops; @@ -987,8 +1089,12 @@ void build_gc_manager(struct f2fs_sb_info *sbi) main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg; resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg; ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; - blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec; - sbi->fggc_threshold = div_u64((main_count - ovp_count) * blocks_per_sec, - (main_count - resv_count)); + sbi->fggc_threshold = div64_u64((main_count - ovp_count) * + BLKS_PER_SEC(sbi), (main_count - resv_count)); + + /* give warm/cold data area from slower device */ + if (sbi->s_ndevs && sbi->segs_per_sec == 1) + SIT_I(sbi)->last_victim[ALLOC_NEXT] = + GET_SEGNO(sbi, FDEV(0).end_blk) + 1; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index a993967dcdb9..9325191fab2d 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -13,6 +13,7 @@ * whether IO subsystem is idle * or not */ +#define DEF_GC_THREAD_URGENT_SLEEP_TIME 500 /* 500 ms */ #define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */ #define DEF_GC_THREAD_MAX_SLEEP_TIME 60000 #define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ @@ -27,12 +28,15 @@ struct f2fs_gc_kthread { wait_queue_head_t gc_wait_queue_head; /* for gc sleep time */ + unsigned int urgent_sleep_time; unsigned int min_sleep_time; unsigned int max_sleep_time; unsigned int no_gc_sleep_time; /* for changing gc mode */ unsigned int gc_idle; + unsigned int gc_urgent; + unsigned int gc_wake; }; struct gc_inode_list { @@ -65,25 +69,32 @@ static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi) } static inline void increase_sleep_time(struct f2fs_gc_kthread *gc_th, - long *wait) + unsigned int *wait) { + unsigned int min_time = gc_th->min_sleep_time; + unsigned int max_time = gc_th->max_sleep_time; + if (*wait == gc_th->no_gc_sleep_time) return; - *wait += gc_th->min_sleep_time; - if (*wait > gc_th->max_sleep_time) - *wait = gc_th->max_sleep_time; + if ((long long)*wait + (long long)min_time > (long long)max_time) + *wait = max_time; + else + *wait += min_time; } static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th, - long *wait) + unsigned int *wait) { + unsigned int min_time = gc_th->min_sleep_time; + if (*wait == gc_th->no_gc_sleep_time) *wait = gc_th->max_sleep_time; - *wait -= gc_th->min_sleep_time; - if (*wait <= gc_th->min_sleep_time) - *wait = gc_th->min_sleep_time; + if ((long long)*wait - (long long)min_time < (long long)min_time) + *wait = min_time; + else + *wait -= min_time; } static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index e14edc9d389a..9fb5fdc61888 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -23,10 +23,10 @@ bool f2fs_may_inline_data(struct inode *inode) if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) return false; - if (i_size_read(inode) > MAX_INLINE_DATA) + if (i_size_read(inode) > MAX_INLINE_DATA(inode)) return false; - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) + if (f2fs_encrypted_file(inode)) return false; return true; @@ -45,6 +45,7 @@ bool f2fs_may_inline_dentry(struct inode *inode) void read_inline_data(struct page *page, struct page *ipage) { + struct inode *inode = page->mapping->host; void *src_addr, *dst_addr; if (PageUptodate(page)) @@ -52,31 +53,33 @@ void read_inline_data(struct page *page, struct page *ipage) f2fs_bug_on(F2FS_P_SB(page), page->index); - zero_user_segment(page, MAX_INLINE_DATA, PAGE_SIZE); + zero_user_segment(page, MAX_INLINE_DATA(inode), PAGE_SIZE); /* Copy the whole inline data block */ - src_addr = inline_data_addr(ipage); + src_addr = inline_data_addr(inode, ipage); dst_addr = kmap_atomic(page); - memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode)); flush_dcache_page(page); kunmap_atomic(dst_addr); if (!PageUptodate(page)) SetPageUptodate(page); } -bool truncate_inline_inode(struct page *ipage, u64 from) +void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from) { void *addr; - if (from >= MAX_INLINE_DATA) - return false; + if (from >= MAX_INLINE_DATA(inode)) + return; - addr = inline_data_addr(ipage); + addr = inline_data_addr(inode, ipage); f2fs_wait_on_page_writeback(ipage, NODE, true); - memset(addr + from, 0, MAX_INLINE_DATA - from); + memset(addr + from, 0, MAX_INLINE_DATA(inode) - from); set_page_dirty(ipage); - return true; + + if (from == 0) + clear_inode_flag(inode, FI_DATA_EXIST); } int f2fs_read_inline_data(struct inode *inode, struct page *page) @@ -129,9 +132,10 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) .sbi = F2FS_I_SB(dn->inode), .type = DATA, .op = REQ_OP_WRITE, - .op_flags = WRITE_SYNC | REQ_PRIO, + .op_flags = REQ_SYNC | REQ_PRIO, .page = page, .encrypted_page = NULL, + .io_type = FS_DATA_IO, }; int dirty, err; @@ -153,20 +157,23 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) /* write data page to try to make data consistent */ set_page_writeback(page); fio.old_blkaddr = dn->data_blkaddr; + set_inode_flag(dn->inode, FI_HOT_DATA); write_data_page(dn, &fio); f2fs_wait_on_page_writeback(page, DATA, true); - if (dirty) + if (dirty) { inode_dec_dirty_pages(dn->inode); + remove_dirty_inode(dn->inode); + } /* this converted inline_data should be recovered. */ set_inode_flag(dn->inode, FI_APPEND_WRITE); /* clear inline data and flag after data writeback */ - truncate_inline_inode(dn->inode_page, 0); + truncate_inline_inode(dn->inode, dn->inode_page, 0); clear_inline_node(dn->inode_page); clear_out: stat_dec_inline_inode(dn->inode); - f2fs_clear_inline_inode(dn->inode); + clear_inode_flag(dn->inode, FI_INLINE_DATA); f2fs_put_dnode(dn); return 0; } @@ -213,6 +220,8 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) { void *src_addr, *dst_addr; struct dnode_of_data dn; + struct address_space *mapping = page_mapping(page); + unsigned long flags; int err; set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -229,11 +238,16 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) f2fs_wait_on_page_writeback(dn.inode_page, NODE, true); src_addr = kmap_atomic(page); - dst_addr = inline_data_addr(dn.inode_page); - memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + dst_addr = inline_data_addr(inode, dn.inode_page); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode)); kunmap_atomic(src_addr); set_page_dirty(dn.inode_page); + spin_lock_irqsave(&mapping->tree_lock, flags); + radix_tree_tag_clear(&mapping->page_tree, page_index(page), + PAGECACHE_TAG_DIRTY); + spin_unlock_irqrestore(&mapping->tree_lock, flags); + set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); @@ -268,9 +282,9 @@ process_inline: f2fs_wait_on_page_writeback(ipage, NODE, true); - src_addr = inline_data_addr(npage); - dst_addr = inline_data_addr(ipage); - memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + src_addr = inline_data_addr(inode, npage); + dst_addr = inline_data_addr(inode, ipage); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode)); set_inode_flag(inode, FI_INLINE_DATA); set_inode_flag(inode, FI_DATA_EXIST); @@ -283,9 +297,8 @@ process_inline: if (f2fs_has_inline_data(inode)) { ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - if (!truncate_inline_inode(ipage, 0)) - return false; - f2fs_clear_inline_inode(inode); + truncate_inline_inode(inode, ipage, 0); + clear_inode_flag(inode, FI_INLINE_DATA); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { if (truncate_blocks(inode, 0, false)) @@ -299,11 +312,11 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, struct fscrypt_name *fname, struct page **res_page) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); - struct f2fs_inline_dentry *inline_dentry; struct qstr name = FSTR_TO_QSTR(&fname->disk_name); struct f2fs_dir_entry *de; struct f2fs_dentry_ptr d; struct page *ipage; + void *inline_dentry; f2fs_hash_t namehash; ipage = get_node_page(sbi, dir->i_ino); @@ -314,9 +327,9 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, namehash = f2fs_dentry_hash(&name, fname); - inline_dentry = inline_data_addr(ipage); + inline_dentry = inline_data_addr(dir, ipage); - make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2); + make_dentry_ptr_inline(dir, &d, inline_dentry); de = find_target_dentry(fname, namehash, NULL, &d); unlock_page(ipage); if (de) @@ -330,19 +343,19 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, int make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage) { - struct f2fs_inline_dentry *dentry_blk; struct f2fs_dentry_ptr d; + void *inline_dentry; - dentry_blk = inline_data_addr(ipage); + inline_dentry = inline_data_addr(inode, ipage); - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); + make_dentry_ptr_inline(inode, &d, inline_dentry); do_make_empty_dir(inode, parent, &d); set_page_dirty(ipage); /* update i_size to MAX_INLINE_DATA */ - if (i_size_read(inode) < MAX_INLINE_DATA) - f2fs_i_size_write(inode, MAX_INLINE_DATA); + if (i_size_read(inode) < MAX_INLINE_DATA(inode)) + f2fs_i_size_write(inode, MAX_INLINE_DATA(inode)); return 0; } @@ -351,11 +364,12 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, * release ipage in this function. */ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, - struct f2fs_inline_dentry *inline_dentry) + void *inline_dentry) { struct page *page; struct dnode_of_data dn; struct f2fs_dentry_block *dentry_blk; + struct f2fs_dentry_ptr src, dst; int err; page = f2fs_grab_cache_page(dir->i_mapping, 0, false); @@ -370,25 +384,24 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, goto out; f2fs_wait_on_page_writeback(page, DATA, true); - zero_user_segment(page, MAX_INLINE_DATA, PAGE_SIZE); + zero_user_segment(page, MAX_INLINE_DATA(dir), PAGE_SIZE); dentry_blk = kmap_atomic(page); + make_dentry_ptr_inline(dir, &src, inline_dentry); + make_dentry_ptr_block(dir, &dst, dentry_blk); + /* copy data from inline dentry block to new dentry block */ - memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap, - INLINE_DENTRY_BITMAP_SIZE); - memset(dentry_blk->dentry_bitmap + INLINE_DENTRY_BITMAP_SIZE, 0, - SIZE_OF_DENTRY_BITMAP - INLINE_DENTRY_BITMAP_SIZE); + memcpy(dst.bitmap, src.bitmap, src.nr_bitmap); + memset(dst.bitmap + src.nr_bitmap, 0, dst.nr_bitmap - src.nr_bitmap); /* * we do not need to zero out remainder part of dentry and filename * field, since we have used bitmap for marking the usage status of * them, besides, we can also ignore copying/zeroing reserved space * of dentry block, because them haven't been used so far. */ - memcpy(dentry_blk->dentry, inline_dentry->dentry, - sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY); - memcpy(dentry_blk->filename, inline_dentry->filename, - NR_INLINE_DENTRY * F2FS_SLOT_LEN); + memcpy(dst.dentry, src.dentry, SIZE_OF_DIR_ENTRY * src.max); + memcpy(dst.filename, src.filename, src.max * F2FS_SLOT_LEN); kunmap_atomic(dentry_blk); if (!PageUptodate(page)) @@ -396,7 +409,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, set_page_dirty(page); /* clear inline dir and flag after data writeback */ - truncate_inline_inode(ipage, 0); + truncate_inline_inode(dir, ipage, 0); stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); @@ -409,14 +422,13 @@ out: return err; } -static int f2fs_add_inline_entries(struct inode *dir, - struct f2fs_inline_dentry *inline_dentry) +static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) { struct f2fs_dentry_ptr d; unsigned long bit_pos = 0; int err = 0; - make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2); + make_dentry_ptr_inline(dir, &d, inline_dentry); while (bit_pos < d.max) { struct f2fs_dir_entry *de; @@ -437,7 +449,7 @@ static int f2fs_add_inline_entries(struct inode *dir, } new_name.name = d.filename[bit_pos]; - new_name.len = de->name_len; + new_name.len = le16_to_cpu(de->name_len); ino = le32_to_cpu(de->ino); fake_mode = get_de_type(de) << S_SHIFT; @@ -458,20 +470,20 @@ punch_dentry_pages: } static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, - struct f2fs_inline_dentry *inline_dentry) + void *inline_dentry) { - struct f2fs_inline_dentry *backup_dentry; + void *backup_dentry; int err; backup_dentry = f2fs_kmalloc(F2FS_I_SB(dir), - sizeof(struct f2fs_inline_dentry), GFP_F2FS_ZERO); + MAX_INLINE_DATA(dir), GFP_F2FS_ZERO); if (!backup_dentry) { f2fs_put_page(ipage, 1); return -ENOMEM; } - memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA); - truncate_inline_inode(ipage, 0); + memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA(dir)); + truncate_inline_inode(dir, ipage, 0); unlock_page(ipage); @@ -487,9 +499,9 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, return 0; recover: lock_page(ipage); - memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA); + memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir)); f2fs_i_depth_write(dir, 0); - f2fs_i_size_write(dir, MAX_INLINE_DATA); + f2fs_i_size_write(dir, MAX_INLINE_DATA(dir)); set_page_dirty(ipage); f2fs_put_page(ipage, 1); @@ -498,7 +510,7 @@ recover: } static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, - struct f2fs_inline_dentry *inline_dentry) + void *inline_dentry) { if (!F2FS_I(dir)->i_dir_level) return f2fs_move_inline_dirents(dir, ipage, inline_dentry); @@ -514,7 +526,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, struct page *ipage; unsigned int bit_pos; f2fs_hash_t name_hash; - struct f2fs_inline_dentry *dentry_blk = NULL; + void *inline_dentry = NULL; struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(new_name->len); struct page *page = NULL; @@ -524,11 +536,12 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, if (IS_ERR(ipage)) return PTR_ERR(ipage); - dentry_blk = inline_data_addr(ipage); - bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, - slots, NR_INLINE_DENTRY); - if (bit_pos >= NR_INLINE_DENTRY) { - err = f2fs_convert_inline_dir(dir, ipage, dentry_blk); + inline_dentry = inline_data_addr(dir, ipage); + make_dentry_ptr_inline(dir, &d, inline_dentry); + + bit_pos = room_for_filename(d.bitmap, slots, d.max); + if (bit_pos >= d.max) { + err = f2fs_convert_inline_dir(dir, ipage, inline_dentry); if (err) return err; err = -EAGAIN; @@ -543,14 +556,11 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, err = PTR_ERR(page); goto fail; } - if (f2fs_encrypted_inode(dir)) - file_set_enc_name(inode); } f2fs_wait_on_page_writeback(ipage, NODE, true); name_hash = f2fs_dentry_hash(new_name, NULL); - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); set_page_dirty(ipage); @@ -573,7 +583,8 @@ out: void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, struct inode *dir, struct inode *inode) { - struct f2fs_inline_dentry *inline_dentry; + struct f2fs_dentry_ptr d; + void *inline_dentry; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); unsigned int bit_pos; int i; @@ -581,17 +592,18 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, lock_page(page); f2fs_wait_on_page_writeback(page, NODE, true); - inline_dentry = inline_data_addr(page); - bit_pos = dentry - inline_dentry->dentry; + inline_dentry = inline_data_addr(dir, page); + make_dentry_ptr_inline(dir, &d, inline_dentry); + + bit_pos = dentry - d.dentry; for (i = 0; i < slots; i++) - __clear_bit_le(bit_pos + i, - &inline_dentry->dentry_bitmap); + __clear_bit_le(bit_pos + i, d.bitmap); set_page_dirty(page); f2fs_put_page(page, 1); dir->i_ctime = dir->i_mtime = current_time(dir); - f2fs_mark_inode_dirty_sync(dir); + f2fs_mark_inode_dirty_sync(dir, false); if (inode) f2fs_drop_nlink(dir, inode); @@ -602,20 +614,21 @@ bool f2fs_empty_inline_dir(struct inode *dir) struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct page *ipage; unsigned int bit_pos = 2; - struct f2fs_inline_dentry *dentry_blk; + void *inline_dentry; + struct f2fs_dentry_ptr d; ipage = get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) return false; - dentry_blk = inline_data_addr(ipage); - bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_INLINE_DENTRY, - bit_pos); + inline_dentry = inline_data_addr(dir, ipage); + make_dentry_ptr_inline(dir, &d, inline_dentry); + + bit_pos = find_next_bit_le(d.bitmap, d.max, bit_pos); f2fs_put_page(ipage, 1); - if (bit_pos < NR_INLINE_DENTRY) + if (bit_pos < d.max) return false; return true; @@ -625,26 +638,30 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, struct fscrypt_str *fstr) { struct inode *inode = file_inode(file); - struct f2fs_inline_dentry *inline_dentry = NULL; struct page *ipage = NULL; struct f2fs_dentry_ptr d; + void *inline_dentry = NULL; + int err; - if (ctx->pos == NR_INLINE_DENTRY) + make_dentry_ptr_inline(inode, &d, inline_dentry); + + if (ctx->pos == d.max) return 0; ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) return PTR_ERR(ipage); - inline_dentry = inline_data_addr(ipage); + inline_dentry = inline_data_addr(inode, ipage); - make_dentry_ptr(inode, &d, (void *)inline_dentry, 2); + make_dentry_ptr_inline(inode, &d, inline_dentry); - if (!f2fs_fill_dentries(ctx, &d, 0, fstr)) - ctx->pos = NR_INLINE_DENTRY; + err = f2fs_fill_dentries(ctx, &d, 0, fstr); + if (!err) + ctx->pos = d.max; f2fs_put_page(ipage, 1); - return 0; + return err < 0 ? err : 0; } int f2fs_inline_data_fiemap(struct inode *inode, @@ -666,7 +683,7 @@ int f2fs_inline_data_fiemap(struct inode *inode, goto out; } - ilen = min_t(size_t, MAX_INLINE_DATA, i_size_read(inode)); + ilen = min_t(size_t, MAX_INLINE_DATA(inode), i_size_read(inode)); if (start >= ilen) goto out; if (start + len < ilen) @@ -675,7 +692,8 @@ int f2fs_inline_data_fiemap(struct inode *inode, get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni); byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; - byteaddr += (char *)inline_data_addr(ipage) - (char *)F2FS_INODE(ipage); + byteaddr += (char *)inline_data_addr(inode, ipage) - + (char *)F2FS_INODE(ipage); err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags); out: f2fs_put_page(ipage, 1); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index d7369895a78a..50c88e37ed66 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -16,13 +16,15 @@ #include "f2fs.h" #include "node.h" +#include "segment.h" #include -void f2fs_mark_inode_dirty_sync(struct inode *inode) +void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync) { - if (f2fs_inode_dirtied(inode)) + if (f2fs_inode_dirtied(inode, sync)) return; + mark_inode_dirty_sync(inode); } @@ -43,25 +45,26 @@ void f2fs_set_inode_flags(struct inode *inode) new_fl |= S_DIRSYNC; inode_set_flags(inode, new_fl, S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); - f2fs_mark_inode_dirty_sync(inode); } static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) { + int extra_size = get_extra_isize(inode); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { - if (ri->i_addr[0]) - inode->i_rdev = - old_decode_dev(le32_to_cpu(ri->i_addr[0])); + if (ri->i_addr[extra_size]) + inode->i_rdev = old_decode_dev( + le32_to_cpu(ri->i_addr[extra_size])); else - inode->i_rdev = - new_decode_dev(le32_to_cpu(ri->i_addr[1])); + inode->i_rdev = new_decode_dev( + le32_to_cpu(ri->i_addr[extra_size + 1])); } } static bool __written_first_block(struct f2fs_inode *ri) { - block_t addr = le32_to_cpu(ri->i_addr[0]); + block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]); if (addr != NEW_ADDR && addr != NULL_ADDR) return true; @@ -70,25 +73,27 @@ static bool __written_first_block(struct f2fs_inode *ri) static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) { + int extra_size = get_extra_isize(inode); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { if (old_valid_dev(inode->i_rdev)) { - ri->i_addr[0] = + ri->i_addr[extra_size] = cpu_to_le32(old_encode_dev(inode->i_rdev)); - ri->i_addr[1] = 0; + ri->i_addr[extra_size + 1] = 0; } else { - ri->i_addr[0] = 0; - ri->i_addr[1] = + ri->i_addr[extra_size] = 0; + ri->i_addr[extra_size + 1] = cpu_to_le32(new_encode_dev(inode->i_rdev)); - ri->i_addr[2] = 0; + ri->i_addr[extra_size + 2] = 0; } } } static void __recover_inline_status(struct inode *inode, struct page *ipage) { - void *inline_data = inline_data_addr(ipage); + void *inline_data = inline_data_addr(inode, ipage); __le32 *start = inline_data; - __le32 *end = start + MAX_INLINE_DATA / sizeof(__le32); + __le32 *end = start + MAX_INLINE_DATA(inode) / sizeof(__le32); while (start < end) { if (*start++) { @@ -103,12 +108,84 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage) return; } +static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page) +{ + struct f2fs_inode *ri = &F2FS_NODE(page)->i; + int extra_isize = le32_to_cpu(ri->i_extra_isize); + + if (!f2fs_sb_has_inode_chksum(sbi->sb)) + return false; + + if (!RAW_IS_INODE(F2FS_NODE(page)) || !(ri->i_inline & F2FS_EXTRA_ATTR)) + return false; + + if (!F2FS_FITS_IN_INODE(ri, extra_isize, i_inode_checksum)) + return false; + + return true; +} + +static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct page *page) +{ + struct f2fs_node *node = F2FS_NODE(page); + struct f2fs_inode *ri = &node->i; + __le32 ino = node->footer.ino; + __le32 gen = ri->i_generation; + __u32 chksum, chksum_seed; + __u32 dummy_cs = 0; + unsigned int offset = offsetof(struct f2fs_inode, i_inode_checksum); + unsigned int cs_size = sizeof(dummy_cs); + + chksum = f2fs_chksum(sbi, sbi->s_chksum_seed, (__u8 *)&ino, + sizeof(ino)); + chksum_seed = f2fs_chksum(sbi, chksum, (__u8 *)&gen, sizeof(gen)); + + chksum = f2fs_chksum(sbi, chksum_seed, (__u8 *)ri, offset); + chksum = f2fs_chksum(sbi, chksum, (__u8 *)&dummy_cs, cs_size); + offset += cs_size; + chksum = f2fs_chksum(sbi, chksum, (__u8 *)ri + offset, + F2FS_BLKSIZE - offset); + return chksum; +} + +bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) +{ + struct f2fs_inode *ri; + __u32 provided, calculated; + + if (!f2fs_enable_inode_chksum(sbi, page) || + PageDirty(page) || PageWriteback(page)) + return true; + + ri = &F2FS_NODE(page)->i; + provided = le32_to_cpu(ri->i_inode_checksum); + calculated = f2fs_inode_chksum(sbi, page); + + if (provided != calculated) + f2fs_msg(sbi->sb, KERN_WARNING, + "checksum invalid, ino = %x, %x vs. %x", + ino_of_node(page), provided, calculated); + + return provided == calculated; +} + +void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page) +{ + struct f2fs_inode *ri = &F2FS_NODE(page)->i; + + if (!f2fs_enable_inode_chksum(sbi, page)) + return; + + ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page)); +} + static int do_read_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); struct page *node_page; struct f2fs_inode *ri; + projid_t i_projid; /* Check if ino is within scope */ if (check_nid_range(sbi, inode->i_ino)) { @@ -129,7 +206,7 @@ static int do_read_inode(struct inode *inode) i_gid_write(inode, le32_to_cpu(ri->i_gid)); set_nlink(inode, le32_to_cpu(ri->i_links)); inode->i_size = le64_to_cpu(ri->i_size); - inode->i_blocks = le64_to_cpu(ri->i_blocks); + inode->i_blocks = SECTOR_FROM_BLOCK(le64_to_cpu(ri->i_blocks) - 1); inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime); inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime); @@ -152,6 +229,9 @@ static int do_read_inode(struct inode *inode) get_inline_info(inode, ri); + fi->i_extra_isize = f2fs_has_extra_attr(inode) ? + le16_to_cpu(ri->i_extra_isize) : 0; + /* check data exist */ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) __recover_inline_status(inode, node_page); @@ -165,6 +245,16 @@ static int do_read_inode(struct inode *inode) if (!need_inode_block_update(sbi, inode->i_ino)) fi->last_disk_size = inode->i_size; + if (fi->i_flags & FS_PROJINHERIT_FL) + set_inode_flag(inode, FI_PROJ_INHERIT); + + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi->sb) && + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) + i_projid = (projid_t)le32_to_cpu(ri->i_projid); + else + i_projid = F2FS_DEF_PROJID; + fi->i_projid = make_kprojid(&init_user_ns, i_projid); + f2fs_put_page(node_page, 1); stat_inc_inline_xattr(inode); @@ -225,6 +315,7 @@ make_now: ret = -EIO; goto bad_inode; } + f2fs_set_inode_flags(inode); unlock_new_inode(inode); trace_f2fs_iget(inode); return inode; @@ -252,6 +343,7 @@ retry: int update_inode(struct inode *inode, struct page *node_page) { struct f2fs_inode *ri; + struct extent_tree *et = F2FS_I(inode)->extent_tree; f2fs_inode_synced(inode); @@ -265,13 +357,15 @@ int update_inode(struct inode *inode, struct page *node_page) ri->i_gid = cpu_to_le32(i_gid_read(inode)); ri->i_links = cpu_to_le32(inode->i_nlink); ri->i_size = cpu_to_le64(i_size_read(inode)); - ri->i_blocks = cpu_to_le64(inode->i_blocks); + ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1); - if (F2FS_I(inode)->extent_tree) - set_raw_extent(&F2FS_I(inode)->extent_tree->largest, - &ri->i_ext); - else + if (et) { + read_lock(&et->lock); + set_raw_extent(&et->largest, &ri->i_ext); + read_unlock(&et->lock); + } else { memset(&ri->i_ext, 0, sizeof(ri->i_ext)); + } set_raw_inline(inode, ri); ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); @@ -287,6 +381,20 @@ int update_inode(struct inode *inode, struct page *node_page) ri->i_generation = cpu_to_le32(inode->i_generation); ri->i_dir_level = F2FS_I(inode)->i_dir_level; + if (f2fs_has_extra_attr(inode)) { + ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize); + + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) && + F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, + i_projid)) { + projid_t i_projid; + + i_projid = from_kprojid(&init_user_ns, + F2FS_I(inode)->i_projid); + ri->i_projid = cpu_to_le32(i_projid); + } + } + __set_inode_rdev(inode, ri); set_cold_node(inode, node_page); @@ -312,7 +420,6 @@ retry: } else if (err != -ENOENT) { f2fs_stop_checkpoint(sbi, false); } - f2fs_inode_synced(inode); return 0; } ret = update_inode(inode, node_page); @@ -335,7 +442,8 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) * We need to balance fs here to prevent from producing dirty node pages * during the urgent cleaning time when runing out of free sections. */ - if (update_inode_page(inode)) + update_inode_page(inode); + if (wbc && wbc->nr_to_write) f2fs_balance_fs(sbi, true); return 0; } @@ -368,10 +476,10 @@ void f2fs_evict_inode(struct inode *inode) if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; -#ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(sbi, FAULT_EVICT_INODE)) - goto no_delete; -#endif + dquot_initialize(inode); + + remove_ino_entry(sbi, inode->i_ino, APPEND_INO); + remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); sb_start_intwrite(inode->i_sb); set_inode_flag(inode, FI_NO_ALLOC); @@ -380,10 +488,18 @@ retry: if (F2FS_HAS_BLOCKS(inode)) err = f2fs_truncate(inode); +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(sbi, FAULT_EVICT_INODE)) { + f2fs_show_injection_info(FAULT_EVICT_INODE); + err = -EIO; + } +#endif if (!err) { f2fs_lock_op(sbi); err = remove_inode_page(inode); f2fs_unlock_op(sbi); + if (err == -ENOENT) + err = 0; } /* give more chances, if ENOMEM case */ @@ -394,25 +510,37 @@ retry: if (err) update_inode_page(inode); + dquot_free_inode(inode); sb_end_intwrite(inode->i_sb); no_delete: + dquot_drop(inode); + stat_dec_inline_xattr(inode); stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); - invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); + if (!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) + f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); + + /* ino == 0, if f2fs_new_inode() was failed t*/ + if (inode->i_ino) + invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, + inode->i_ino); if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); - if (is_inode_flag_set(inode, FI_APPEND_WRITE)) - add_ino_entry(sbi, inode->i_ino, APPEND_INO); - if (is_inode_flag_set(inode, FI_UPDATE_WRITE)) - add_ino_entry(sbi, inode->i_ino, UPDATE_INO); + if (inode->i_nlink) { + if (is_inode_flag_set(inode, FI_APPEND_WRITE)) + add_ino_entry(sbi, inode->i_ino, APPEND_INO); + if (is_inode_flag_set(inode, FI_UPDATE_WRITE)) + add_ino_entry(sbi, inode->i_ino, UPDATE_INO); + } if (is_inode_flag_set(inode, FI_FREE_NID)) { alloc_nid_failed(sbi, inode->i_ino); clear_inode_flag(inode, FI_FREE_NID); + } else { + f2fs_bug_on(sbi, err && + !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)); } - f2fs_bug_on(sbi, err && - !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)); out_clear: fscrypt_put_encryption_info(inode, NULL); clear_inode(inode); @@ -424,6 +552,19 @@ void handle_failed_inode(struct inode *inode) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct node_info ni; + /* + * clear nlink of inode in order to release resource of inode + * immediately. + */ + clear_nlink(inode); + + /* + * we must call this to avoid inode being remained as dirty, resulting + * in a panic when flushing dirty inodes in gdirty_list. + */ + update_inode_page(inode); + f2fs_inode_synced(inode); + /* don't make bad inode, since it becomes a regular file. */ unlock_new_inode(inode); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 489fa0d5f914..87440ab5e51b 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -42,6 +43,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) } f2fs_unlock_op(sbi); + nid_free = true; + inode_init_owner(inode, dir, mode); inode->i_ino = ino; @@ -52,16 +55,35 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) err = insert_inode_locked(inode); if (err) { err = -EINVAL; - nid_free = true; goto fail; } + if (f2fs_sb_has_project_quota(sbi->sb) && + (F2FS_I(dir)->i_flags & FS_PROJINHERIT_FL)) + F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid; + else + F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns, + F2FS_DEF_PROJID); + + err = dquot_initialize(inode); + if (err) + goto fail_drop; + + err = dquot_alloc_inode(inode); + if (err) + goto fail_drop; + /* If the directory encrypted, then we should encrypt the inode. */ if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) f2fs_set_encrypted_inode(inode); set_inode_flag(inode, FI_NEW_INODE); + if (f2fs_sb_has_extra_attr(sbi->sb)) { + set_inode_flag(inode, FI_EXTRA_ATTR); + F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; + } + if (test_opt(sbi, INLINE_XATTR)) set_inode_flag(inode, FI_INLINE_XATTR); if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) @@ -75,6 +97,15 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) stat_inc_inline_inode(inode); stat_inc_inline_dir(inode); + F2FS_I(inode)->i_flags = + f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED); + + if (S_ISDIR(inode->i_mode)) + F2FS_I(inode)->i_flags |= FS_INDEX_FL; + + if (F2FS_I(inode)->i_flags & FS_PROJINHERIT_FL) + set_inode_flag(inode, FI_PROJ_INHERIT); + trace_f2fs_new_inode(inode, 0); return inode; @@ -85,6 +116,16 @@ fail: set_inode_flag(inode, FI_FREE_NID); iput(inode); return ERR_PTR(err); +fail_drop: + trace_f2fs_new_inode(inode, err); + dquot_drop(inode); + inode->i_flags |= S_NOQUOTA; + if (nid_free) + set_inode_flag(inode, FI_FREE_NID); + clear_nlink(inode); + unlock_new_inode(inode); + iput(inode); + return ERR_PTR(err); } static int is_multimedia_file(const unsigned char *s, const char *sub) @@ -136,6 +177,10 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, nid_t ino = 0; int err; + err = dquot_initialize(dir); + if (err) + return err; + inode = f2fs_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -148,8 +193,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, inode->i_mapping->a_ops = &f2fs_dblock_aops; ino = inode->i_ino; - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -163,6 +206,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_balance_fs(sbi, true); return 0; out: handle_failed_inode(inode); @@ -180,6 +225,15 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, !fscrypt_has_permitted_context(dir, inode)) return -EPERM; + if (is_inode_flag_set(dir, FI_PROJ_INHERIT) && + (!projid_eq(F2FS_I(dir)->i_projid, + F2FS_I(old_dentry->d_inode)->i_projid))) + return -EXDEV; + + err = dquot_initialize(dir); + if (err) + return err; + f2fs_balance_fs(sbi, true); inode->i_ctime = current_time(inode); @@ -233,6 +287,10 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) return 0; } + err = dquot_initialize(dir); + if (err) + return err; + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); @@ -321,12 +379,13 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, if (err) goto err_out; } - if (!IS_ERR(inode) && f2fs_encrypted_inode(dir) && - (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && - !fscrypt_has_permitted_context(dir, inode)) { - bool nokey = f2fs_encrypted_inode(inode) && - !fscrypt_has_encryption_key(inode); - err = nokey ? -ENOKEY : -EPERM; + if (f2fs_encrypted_inode(dir) && + (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && + !fscrypt_has_permitted_context(dir, inode)) { + f2fs_msg(inode->i_sb, KERN_WARNING, + "Inconsistent encryption contexts: %lu/%lu", + dir->i_ino, inode->i_ino); + err = -EPERM; goto err_out; } return d_splice_alias(inode, dentry); @@ -346,6 +405,10 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) trace_f2fs_unlink_enter(dir, dentry); + err = dquot_initialize(dir); + if (err) + return err; + de = f2fs_find_entry(dir, &dentry->d_name, &page); if (!de) { if (IS_ERR(page)) @@ -403,7 +466,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, return err; if (!fscrypt_has_encryption_key(dir)) - return -EPERM; + return -ENOKEY; disk_link.len = (fscrypt_fname_encrypted_size(dir, len) + sizeof(struct fscrypt_symlink_data)); @@ -412,6 +475,10 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (disk_link.len > dir->i_sb->s_blocksize) return -ENAMETOOLONG; + err = dquot_initialize(dir); + if (err) + return err; + inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -423,8 +490,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, inode_nohighmem(inode); inode->i_mapping->a_ops = &f2fs_dblock_aops; - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -447,7 +512,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, goto err_out; if (!fscrypt_has_encryption_key(inode)) { - err = -EPERM; + err = -ENOKEY; goto err_out; } @@ -487,6 +552,8 @@ err_out: } kfree(sd); + + f2fs_balance_fs(sbi, true); return err; out: handle_failed_inode(inode); @@ -499,6 +566,10 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct inode *inode; int err; + err = dquot_initialize(dir); + if (err) + return err; + inode = f2fs_new_inode(dir, S_IFDIR | mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -508,8 +579,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_mapping->a_ops = &f2fs_dblock_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); - f2fs_balance_fs(sbi, true); - set_inode_flag(inode, FI_INC_LINK); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); @@ -524,6 +593,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_balance_fs(sbi, true); return 0; out_fail: @@ -547,6 +618,10 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, struct inode *inode; int err = 0; + err = dquot_initialize(dir); + if (err) + return err; + inode = f2fs_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -554,8 +629,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, inode->i_mode, rdev); inode->i_op = &f2fs_special_inode_operations; - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -569,6 +642,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_balance_fs(sbi, true); return 0; out: handle_failed_inode(inode); @@ -582,6 +657,10 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, struct inode *inode; int err; + err = dquot_initialize(dir); + if (err) + return err; + inode = f2fs_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -595,8 +674,6 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &f2fs_dblock_aops; } - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); err = acquire_orphan_inode(sbi); if (err) @@ -622,6 +699,8 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, /* link_count was changed by d_tmpfile as well. */ f2fs_unlock_op(sbi); unlock_new_inode(inode); + + f2fs_balance_fs(sbi, true); return 0; release_out: @@ -663,12 +742,31 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, bool is_old_inline = f2fs_has_inline_dentry(old_dir); int err = -ENOENT; + if ((f2fs_encrypted_inode(old_dir) && + !fscrypt_has_encryption_key(old_dir)) || + (f2fs_encrypted_inode(new_dir) && + !fscrypt_has_encryption_key(new_dir))) + return -ENOKEY; + if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) && !fscrypt_has_permitted_context(new_dir, old_inode)) { err = -EPERM; goto out; } + if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && + (!projid_eq(F2FS_I(new_dir)->i_projid, + F2FS_I(old_dentry->d_inode)->i_projid))) + return -EXDEV; + + err = dquot_initialize(old_dir); + if (err) + goto out; + + err = dquot_initialize(new_dir); + if (err) + goto out; + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) @@ -714,13 +812,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto put_out_dir; - err = update_dent_inode(old_inode, new_inode, - &new_dentry->d_name); - if (err) { - release_orphan_inode(sbi); - goto put_out_dir; - } - f2fs_set_link(new_dir, new_entry, new_page, old_inode); new_inode->i_ctime = current_time(new_inode); @@ -772,13 +863,14 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } down_write(&F2FS_I(old_inode)->i_sem); - file_lost_pino(old_inode); - if (new_inode && file_enc_name(new_inode)) - file_set_enc_name(old_inode); + if (!old_dir_entry || whiteout) + file_lost_pino(old_inode); + else + F2FS_I(old_inode)->i_pino = new_dir->i_ino; up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = current_time(old_inode); - f2fs_mark_inode_dirty_sync(old_inode); + f2fs_mark_inode_dirty_sync(old_inode, false); f2fs_delete_entry(old_entry, old_page, old_dir, NULL); @@ -843,12 +935,34 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, int old_nlink = 0, new_nlink = 0; int err = -ENOENT; + if ((f2fs_encrypted_inode(old_dir) && + !fscrypt_has_encryption_key(old_dir)) || + (f2fs_encrypted_inode(new_dir) && + !fscrypt_has_encryption_key(new_dir))) + return -ENOKEY; + if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) && (old_dir != new_dir) && (!fscrypt_has_permitted_context(new_dir, old_inode) || !fscrypt_has_permitted_context(old_dir, new_inode))) return -EPERM; + if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && + !projid_eq(F2FS_I(new_dir)->i_projid, + F2FS_I(old_dentry->d_inode)->i_projid)) || + (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && + !projid_eq(F2FS_I(old_dir)->i_projid, + F2FS_I(new_dentry->d_inode)->i_projid))) + return -EXDEV; + + err = dquot_initialize(old_dir); + if (err) + goto out; + + err = dquot_initialize(new_dir); + if (err) + goto out; + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) @@ -896,8 +1010,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, old_nlink = old_dir_entry ? -1 : 1; new_nlink = -old_nlink; err = -EMLINK; - if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) || - (new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX)) + if ((old_nlink > 0 && old_dir->i_nlink >= F2FS_LINK_MAX) || + (new_nlink > 0 && new_dir->i_nlink >= F2FS_LINK_MAX)) goto out_new_dir; } @@ -905,18 +1019,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_lock_op(sbi); - err = update_dent_inode(old_inode, new_inode, &new_dentry->d_name); - if (err) - goto out_unlock; - if (file_enc_name(new_inode)) - file_set_enc_name(old_inode); - - err = update_dent_inode(new_inode, old_inode, &old_dentry->d_name); - if (err) - goto out_undo; - if (file_enc_name(old_inode)) - file_set_enc_name(new_inode); - /* update ".." directory entry info of old dentry */ if (old_dir_entry) f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); @@ -938,7 +1040,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_i_links_write(old_dir, old_nlink > 0); up_write(&F2FS_I(old_dir)->i_sem); } - f2fs_mark_inode_dirty_sync(old_dir); + f2fs_mark_inode_dirty_sync(old_dir, false); /* update directory entry info of new dir inode */ f2fs_set_link(new_dir, new_entry, new_page, old_inode); @@ -953,21 +1055,13 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_i_links_write(new_dir, new_nlink > 0); up_write(&F2FS_I(new_dir)->i_sem); } - f2fs_mark_inode_dirty_sync(new_dir); + f2fs_mark_inode_dirty_sync(new_dir, false); f2fs_unlock_op(sbi); if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); return 0; -out_undo: - /* - * Still we may fail to recover name info of f2fs_inode here - * Drop it, once its name is set as encrypted - */ - update_dent_inode(old_inode, old_inode, &old_dentry->d_name); -out_unlock: - f2fs_unlock_op(sbi); out_new_dir: if (new_dir_entry) { f2fs_dentry_kunmap(new_inode, new_dir_page); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 01177ecdeab8..fca87835a1da 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -19,10 +19,11 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include "xattr.h" #include "trace.h" #include -#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock) +#define on_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock) static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; @@ -45,8 +46,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) * give 25%, 25%, 50%, 50%, 50% memory for each components respectively */ if (type == FREE_NIDS) { - mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> - PAGE_SHIFT; + mem_size = (nm_i->nid_cnt[FREE_NID_LIST] * + sizeof(struct free_nid)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == NAT_ENTRIES) { mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> @@ -63,8 +64,9 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) int i; for (i = 0; i <= UPDATE_INO; i++) - mem_size += (sbi->im[i].ino_num * - sizeof(struct ino_entry)) >> PAGE_SHIFT; + mem_size += sbi->im[i].ino_num * + sizeof(struct ino_entry); + mem_size >>= PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } else if (type == EXTENT_CACHE) { mem_size = (atomic_read(&sbi->total_ext_tree) * @@ -157,9 +159,6 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); struct nat_entry_set *head; - if (get_nat_flag(ne, IS_DIRTY)) - return; - head = radix_tree_lookup(&nm_i->nat_set_root, set); if (!head) { head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS); @@ -170,25 +169,27 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, head->entry_cnt = 0; f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head); } - list_move_tail(&ne->list, &head->entry_list); + + if (get_nat_flag(ne, IS_DIRTY)) + goto refresh_list; + nm_i->dirty_nat_cnt++; head->entry_cnt++; set_nat_flag(ne, IS_DIRTY, true); +refresh_list: + if (nat_get_blkaddr(ne) == NEW_ADDR) + list_del_init(&ne->list); + else + list_move_tail(&ne->list, &head->entry_list); } static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, - struct nat_entry *ne) + struct nat_entry_set *set, struct nat_entry *ne) { - nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); - struct nat_entry_set *head; - - head = radix_tree_lookup(&nm_i->nat_set_root, set); - if (head) { - list_move_tail(&ne->list, &nm_i->nat_entries); - set_nat_flag(ne, IS_DIRTY, false); - head->entry_cnt--; - nm_i->dirty_nat_cnt--; - } + list_move_tail(&ne->list, &nm_i->nat_entries); + set_nat_flag(ne, IS_DIRTY, false); + set->entry_cnt--; + nm_i->dirty_nat_cnt--; } static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, @@ -245,12 +246,24 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) return need_update; } -static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) +static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, + bool no_fail) { struct nat_entry *new; - new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS); - f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); + if (no_fail) { + new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS); + f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); + } else { + new = kmem_cache_alloc(nat_entry_slab, GFP_NOFS); + if (!new) + return NULL; + if (radix_tree_insert(&nm_i->nat_root, nid, new)) { + kmem_cache_free(nat_entry_slab, new); + return NULL; + } + } + memset(new, 0, sizeof(struct nat_entry)); nat_set_nid(new, nid); nat_reset_flag(new); @@ -267,11 +280,13 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, e = __lookup_nat_cache(nm_i, nid); if (!e) { - e = grab_nat_entry(nm_i, nid); - node_info_from_raw_nat(&e->ni, ne); + e = grab_nat_entry(nm_i, nid, false); + if (e) + node_info_from_raw_nat(&e->ni, ne); } else { - f2fs_bug_on(sbi, nat_get_ino(e) != ne->ino || - nat_get_blkaddr(e) != ne->block_addr || + f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) || + nat_get_blkaddr(e) != + le32_to_cpu(ne->block_addr) || nat_get_version(e) != ne->version); } } @@ -285,7 +300,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { - e = grab_nat_entry(nm_i, ni->nid); + e = grab_nat_entry(nm_i, ni->nid, true); copy_node_info(&e->ni, ni); f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { @@ -367,6 +382,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) struct page *page = NULL; struct f2fs_nat_entry ne; struct nat_entry *e; + pgoff_t index; int i; ni->nid = nid; @@ -392,17 +408,21 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) node_info_from_raw_nat(ni, &ne); } up_read(&curseg->journal_rwsem); - if (i >= 0) + if (i >= 0) { + up_read(&nm_i->nat_tree_lock); goto cache; + } /* Fill node_info from nat page */ - page = get_current_nat_page(sbi, start_nid); + index = current_nat_addr(sbi, nid); + up_read(&nm_i->nat_tree_lock); + + page = get_meta_page(sbi, index); nat_blk = (struct f2fs_nat_block *)page_address(page); ne = nat_blk->entries[nid - start_nid]; node_info_from_raw_nat(ni, &ne); f2fs_put_page(page, 1); cache: - up_read(&nm_i->nat_tree_lock); /* cache nat entry */ down_write(&nm_i->nat_tree_lock); cache_nat_entry(sbi, nid, &ne); @@ -535,7 +555,7 @@ static int get_node_path(struct inode *inode, long block, level = 3; goto got; } else { - BUG(); + return -E2BIG; } got: return level; @@ -559,6 +579,8 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) int err = 0; level = get_node_path(dn->inode, index, offset, noffset); + if (level < 0) + return level; nids[0] = dn->inode->i_ino; npage[0] = dn->inode_page; @@ -594,7 +616,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) } dn->nid = nids[i]; - npage[i] = new_node_page(dn, noffset[i], NULL); + npage[i] = new_node_page(dn, noffset[i]); if (IS_ERR(npage[i])) { alloc_nid_failed(sbi, nids[i]); err = PTR_ERR(npage[i]); @@ -635,7 +657,8 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) dn->nid = nids[level]; dn->ofs_in_node = offset[level]; dn->node_page = npage[level]; - dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); + dn->data_blkaddr = datablock_addr(dn->inode, + dn->node_page, dn->ofs_in_node); return 0; release_pages: @@ -659,15 +682,11 @@ static void truncate_node(struct dnode_of_data *dn) struct node_info ni; get_node_info(sbi, dn->nid, &ni); - if (dn->inode->i_blocks == 0) { - f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR); - goto invalidate; - } f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); /* Deallocate node address */ invalidate_blocks(sbi, ni.blk_addr); - dec_valid_node_count(sbi, dn->inode); + dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino); set_node_addr(sbi, &ni, NULL_ADDR, false); if (dn->nid == dn->inode->i_ino) { @@ -675,7 +694,7 @@ static void truncate_node(struct dnode_of_data *dn) dec_valid_inode_count(sbi); f2fs_inode_synced(dn->inode); } -invalidate: + clear_node_page_dirty(dn->node_page); set_sbi_flag(sbi, SBI_IS_DIRTY); @@ -861,6 +880,8 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from) trace_f2fs_truncate_inode_blocks_enter(inode, from); level = get_node_path(inode, from, offset, noffset); + if (level < 0) + return level; page = get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) { @@ -957,9 +978,6 @@ int truncate_xattr_node(struct inode *inode, struct page *page) f2fs_i_xnid_write(inode, 0); - /* need to do checkpoint during fsync */ - F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); - set_new_dnode(&dn, inode, page, npage, nid); if (page) @@ -995,7 +1013,7 @@ int remove_inode_page(struct inode *inode) /* 0 is possible, after f2fs_new_inode() has failed */ f2fs_bug_on(F2FS_I_SB(inode), - inode->i_blocks != 0 && inode->i_blocks != 1); + inode->i_blocks != 0 && inode->i_blocks != 8); /* will put inode & node pages */ truncate_node(&dn); @@ -1010,14 +1028,13 @@ struct page *new_inode_page(struct inode *inode) set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); /* caller should f2fs_put_page(page, 1); */ - return new_node_page(&dn, 0, NULL); + return new_node_page(&dn, 0); } -struct page *new_node_page(struct dnode_of_data *dn, - unsigned int ofs, struct page *ipage) +struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); - struct node_info old_ni, new_ni; + struct node_info new_ni; struct page *page; int err; @@ -1028,17 +1045,18 @@ struct page *new_node_page(struct dnode_of_data *dn, if (!page) return ERR_PTR(-ENOMEM); - if (unlikely(!inc_valid_node_count(sbi, dn->inode))) { - err = -ENOSPC; + if (unlikely((err = inc_valid_node_count(sbi, dn->inode, !ofs)))) goto fail; - } - get_node_info(sbi, dn->nid, &old_ni); - - /* Reinitialize old_ni with new node page */ - f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR); - new_ni = old_ni; +#ifdef CONFIG_F2FS_CHECK_FS + get_node_info(sbi, dn->nid, &new_ni); + f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR); +#endif + new_ni.nid = dn->nid; new_ni.ino = dn->inode->i_ino; + new_ni.blk_addr = NULL_ADDR; + new_ni.flag = 0; + new_ni.version = 0; set_node_addr(sbi, &new_ni, NEW_ADDR, false); f2fs_wait_on_page_writeback(page, NODE, true); @@ -1134,11 +1152,12 @@ repeat: if (!page) return ERR_PTR(-ENOMEM); - err = read_node_page(page, READ_SYNC); + err = read_node_page(page, 0); if (err < 0) { f2fs_put_page(page, 1); return ERR_PTR(err); } else if (err == LOCKED_PAGE) { + err = 0; goto page_hit; } @@ -1152,15 +1171,27 @@ repeat: goto repeat; } - if (unlikely(!PageUptodate(page))) + if (unlikely(!PageUptodate(page))) { + err = -EIO; goto out_err; + } + + if (!f2fs_inode_chksum_verify(sbi, page)) { + err = -EBADMSG; + goto out_err; + } page_hit: if(unlikely(nid != nid_of_node(page))) { - f2fs_bug_on(sbi, 1); - ClearPageUptodate(page); + f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, " + "nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", + nid, nid_of_node(page), ino_of_node(page), + ofs_of_node(page), cpver_of_node(page), + next_blkaddr_of_node(page)); + err = -EINVAL; out_err: + ClearPageUptodate(page); f2fs_put_page(page, 1); - return ERR_PTR(-EIO); + return ERR_PTR(err); } return page; } @@ -1204,6 +1235,7 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) ret = f2fs_write_inline_data(inode, page); inode_dec_dirty_pages(inode); + remove_dirty_inode(inode); if (ret) set_page_dirty(page); page_out: @@ -1303,16 +1335,103 @@ continue_unlock: return last_page; } +static int __write_node_page(struct page *page, bool atomic, bool *submitted, + struct writeback_control *wbc, bool do_balance, + enum iostat_type io_type) +{ + struct f2fs_sb_info *sbi = F2FS_P_SB(page); + nid_t nid; + struct node_info ni; + struct f2fs_io_info fio = { + .sbi = sbi, + .type = NODE, + .op = REQ_OP_WRITE, + .op_flags = wbc_to_write_flags(wbc), + .page = page, + .encrypted_page = NULL, + .submitted = false, + .io_type = io_type, + }; + + trace_f2fs_writepage(page, NODE); + + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + goto redirty_out; + if (unlikely(f2fs_cp_error(sbi))) + goto redirty_out; + + /* get old block addr of this node page */ + nid = nid_of_node(page); + f2fs_bug_on(sbi, page->index != nid); + + if (wbc->for_reclaim) { + if (!down_read_trylock(&sbi->node_write)) + goto redirty_out; + } else { + down_read(&sbi->node_write); + } + + get_node_info(sbi, nid, &ni); + + /* This page is already truncated */ + if (unlikely(ni.blk_addr == NULL_ADDR)) { + ClearPageUptodate(page); + dec_page_count(sbi, F2FS_DIRTY_NODES); + up_read(&sbi->node_write); + unlock_page(page); + return 0; + } + + if (atomic && !test_opt(sbi, NOBARRIER)) + fio.op_flags |= REQ_PREFLUSH | REQ_FUA; + + set_page_writeback(page); + fio.old_blkaddr = ni.blk_addr; + write_node_page(nid, &fio); + set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page)); + dec_page_count(sbi, F2FS_DIRTY_NODES); + up_read(&sbi->node_write); + + if (wbc->for_reclaim) { + f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0, + page->index, NODE); + submitted = NULL; + } + + unlock_page(page); + + if (unlikely(f2fs_cp_error(sbi))) { + f2fs_submit_merged_write(sbi, NODE); + submitted = NULL; + } + if (submitted) + *submitted = fio.submitted; + + if (do_balance) + f2fs_balance_fs(sbi, false); + return 0; + +redirty_out: + redirty_page_for_writepage(wbc, page); + return AOP_WRITEPAGE_ACTIVATE; +} + +static int f2fs_write_node_page(struct page *page, + struct writeback_control *wbc) +{ + return __write_node_page(page, false, NULL, wbc, false, FS_NODE_IO); +} + int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic) { pgoff_t index, end; + pgoff_t last_idx = ULONG_MAX; struct pagevec pvec; int ret = 0; struct page *last_page = NULL; bool marked = false; nid_t ino = inode->i_ino; - int nwritten = 0; if (atomic) { last_page = last_fsync_dnode(sbi, ino); @@ -1334,11 +1453,13 @@ retry: for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; + bool submitted = false; if (unlikely(f2fs_cp_error(sbi))) { f2fs_put_page(last_page, 0); pagevec_release(&pvec); - return -EIO; + ret = -EIO; + goto out; } if (!IS_DNODE(page) || !is_cold_node(page)) @@ -1364,6 +1485,9 @@ continue_unlock: f2fs_wait_on_page_writeback(page, NODE, true); BUG_ON(PageWriteback(page)); + set_fsync_mark(page, 0); + set_dentry_mark(page, 0); + if (!atomic || page == last_page) { set_fsync_mark(page, 1); if (IS_INODE(page)) { @@ -1381,13 +1505,16 @@ continue_unlock: if (!clear_page_dirty_for_io(page)) goto continue_unlock; - ret = NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); + ret = __write_node_page(page, atomic && + page == last_page, + &submitted, wbc, true, + FS_NODE_IO); if (ret) { unlock_page(page); f2fs_put_page(last_page, 0); break; - } else { - nwritten++; + } else if (submitted) { + last_idx = page->index; } if (page == last_page) { @@ -1407,17 +1534,19 @@ continue_unlock: "Retry to write fsync mark: ino=%u, idx=%lx", ino, last_page->index); lock_page(last_page); + f2fs_wait_on_page_writeback(last_page, NODE, true); set_page_dirty(last_page); unlock_page(last_page); goto retry; } - - if (nwritten) - f2fs_submit_merged_bio_cond(sbi, NULL, NULL, ino, NODE, WRITE); +out: + if (last_idx != ULONG_MAX) + f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE); return ret ? -EIO: 0; } -int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc) +int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, + bool do_balance, enum iostat_type io_type) { pgoff_t index, end; struct pagevec pvec; @@ -1441,6 +1570,7 @@ next_step: for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; + bool submitted = false; if (unlikely(f2fs_cp_error(sbi))) { pagevec_release(&pvec); @@ -1494,9 +1624,11 @@ continue_unlock: set_fsync_mark(page, 0); set_dentry_mark(page, 0); - if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc)) + ret = __write_node_page(page, false, &submitted, + wbc, do_balance, io_type); + if (ret) unlock_page(page); - else + else if (submitted) nwritten++; if (--wbc->nr_to_write == 0) @@ -1517,7 +1649,7 @@ continue_unlock: } out: if (nwritten) - f2fs_submit_merged_bio(sbi, NODE, WRITE); + f2fs_submit_merged_write(sbi, NODE); return ret; } @@ -1560,72 +1692,6 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) return ret; } -static int f2fs_write_node_page(struct page *page, - struct writeback_control *wbc) -{ - struct f2fs_sb_info *sbi = F2FS_P_SB(page); - nid_t nid; - struct node_info ni; - struct f2fs_io_info fio = { - .sbi = sbi, - .type = NODE, - .op = REQ_OP_WRITE, - .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0, - .page = page, - .encrypted_page = NULL, - }; - - trace_f2fs_writepage(page, NODE); - - if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) - goto redirty_out; - if (unlikely(f2fs_cp_error(sbi))) - goto redirty_out; - - /* get old block addr of this node page */ - nid = nid_of_node(page); - f2fs_bug_on(sbi, page->index != nid); - - if (wbc->for_reclaim) { - if (!down_read_trylock(&sbi->node_write)) - goto redirty_out; - } else { - down_read(&sbi->node_write); - } - - get_node_info(sbi, nid, &ni); - - /* This page is already truncated */ - if (unlikely(ni.blk_addr == NULL_ADDR)) { - ClearPageUptodate(page); - dec_page_count(sbi, F2FS_DIRTY_NODES); - up_read(&sbi->node_write); - unlock_page(page); - return 0; - } - - set_page_writeback(page); - fio.old_blkaddr = ni.blk_addr; - write_node_page(nid, &fio); - set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page)); - dec_page_count(sbi, F2FS_DIRTY_NODES); - up_read(&sbi->node_write); - - if (wbc->for_reclaim) - f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, NODE, WRITE); - - unlock_page(page); - - if (unlikely(f2fs_cp_error(sbi))) - f2fs_submit_merged_bio(sbi, NODE, WRITE); - - return 0; - -redirty_out: - redirty_page_for_writepage(wbc, page); - return AOP_WRITEPAGE_ACTIVATE; -} - static int f2fs_write_node_pages(struct address_space *mapping, struct writeback_control *wbc) { @@ -1633,6 +1699,9 @@ static int f2fs_write_node_pages(struct address_space *mapping, struct blk_plug plug; long diff; + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + goto skip_write; + /* balancing f2fs's metadata in background */ f2fs_balance_fs_bg(sbi); @@ -1645,7 +1714,7 @@ static int f2fs_write_node_pages(struct address_space *mapping, diff = nr_pages_to_write(sbi, NODE, wbc); wbc->sync_mode = WB_SYNC_NONE; blk_start_plug(&plug); - sync_node_pages(sbi, wbc); + sync_node_pages(sbi, wbc, true, FS_NODE_IO); blk_finish_plug(&plug); wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); return 0; @@ -1692,100 +1761,220 @@ static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, return radix_tree_lookup(&nm_i->free_nid_root, n); } -static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i, - struct free_nid *i) -{ - list_del(&i->list); - radix_tree_delete(&nm_i->free_nid_root, i->nid); -} - -static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) +static int __insert_nid_to_list(struct f2fs_sb_info *sbi, + struct free_nid *i, enum nid_list list, bool new) { struct f2fs_nm_info *nm_i = NM_I(sbi); - struct free_nid *i; - struct nat_entry *ne; - if (!available_free_memory(sbi, FREE_NIDS)) - return -1; + if (new) { + int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i); + if (err) + return err; + } + + f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW : + i->state != NID_ALLOC); + nm_i->nid_cnt[list]++; + list_add_tail(&i->list, &nm_i->nid_list[list]); + return 0; +} + +static void __remove_nid_from_list(struct f2fs_sb_info *sbi, + struct free_nid *i, enum nid_list list, bool reuse) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + + f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW : + i->state != NID_ALLOC); + nm_i->nid_cnt[list]--; + list_del(&i->list); + if (!reuse) + radix_tree_delete(&nm_i->free_nid_root, i->nid); +} + +/* return if the nid is recognized as free */ +static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i, *e; + struct nat_entry *ne; + int err = -EINVAL; + bool ret = false; /* 0 nid should not be used */ if (unlikely(nid == 0)) - return 0; - - if (build) { - /* do not add allocated nids */ - ne = __lookup_nat_cache(nm_i, nid); - if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || - nat_get_blkaddr(ne) != NULL_ADDR)) - return 0; - } + return false; i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); i->nid = nid; i->state = NID_NEW; - if (radix_tree_preload(GFP_NOFS)) { - kmem_cache_free(free_nid_slab, i); - return 0; - } + if (radix_tree_preload(GFP_NOFS)) + goto err; - spin_lock(&nm_i->free_nid_list_lock); - if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) { - spin_unlock(&nm_i->free_nid_list_lock); - radix_tree_preload_end(); - kmem_cache_free(free_nid_slab, i); - return 0; + spin_lock(&nm_i->nid_list_lock); + + if (build) { + /* + * Thread A Thread B + * - f2fs_create + * - f2fs_new_inode + * - alloc_nid + * - __insert_nid_to_list(ALLOC_NID_LIST) + * - f2fs_balance_fs_bg + * - build_free_nids + * - __build_free_nids + * - scan_nat_page + * - add_free_nid + * - __lookup_nat_cache + * - f2fs_add_link + * - init_inode_metadata + * - new_inode_page + * - new_node_page + * - set_node_addr + * - alloc_nid_done + * - __remove_nid_from_list(ALLOC_NID_LIST) + * - __insert_nid_to_list(FREE_NID_LIST) + */ + ne = __lookup_nat_cache(nm_i, nid); + if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || + nat_get_blkaddr(ne) != NULL_ADDR)) + goto err_out; + + e = __lookup_free_nid_list(nm_i, nid); + if (e) { + if (e->state == NID_NEW) + ret = true; + goto err_out; + } } - list_add_tail(&i->list, &nm_i->free_nid_list); - nm_i->fcnt++; - spin_unlock(&nm_i->free_nid_list_lock); + ret = true; + err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true); +err_out: + spin_unlock(&nm_i->nid_list_lock); radix_tree_preload_end(); - return 1; +err: + if (err) + kmem_cache_free(free_nid_slab, i); + return ret; } -static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) +static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) { + struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i; bool need_free = false; - spin_lock(&nm_i->free_nid_list_lock); + spin_lock(&nm_i->nid_list_lock); i = __lookup_free_nid_list(nm_i, nid); if (i && i->state == NID_NEW) { - __del_from_free_nid_list(nm_i, i); - nm_i->fcnt--; + __remove_nid_from_list(sbi, i, FREE_NID_LIST, false); need_free = true; } - spin_unlock(&nm_i->free_nid_list_lock); + spin_unlock(&nm_i->nid_list_lock); if (need_free) kmem_cache_free(free_nid_slab, i); } +static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, + bool set, bool build) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); + unsigned int nid_ofs = nid - START_NID(nid); + + if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) + return; + + if (set) + __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); + else + __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); + + if (set) + nm_i->free_nid_count[nat_ofs]++; + else if (!build) + nm_i->free_nid_count[nat_ofs]--; +} + static void scan_nat_page(struct f2fs_sb_info *sbi, struct page *nat_page, nid_t start_nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct f2fs_nat_block *nat_blk = page_address(nat_page); block_t blk_addr; + unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid); int i; + if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) + return; + + __set_bit_le(nat_ofs, nm_i->nat_block_bitmap); + i = start_nid % NAT_ENTRY_PER_BLOCK; for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { + bool freed = false; if (unlikely(start_nid >= nm_i->max_nid)) break; blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); f2fs_bug_on(sbi, blk_addr == NEW_ADDR); - if (blk_addr == NULL_ADDR) { - if (add_free_nid(sbi, start_nid, true) < 0) - break; - } + if (blk_addr == NULL_ADDR) + freed = add_free_nid(sbi, start_nid, true); + spin_lock(&NM_I(sbi)->nid_list_lock); + update_free_nid_bitmap(sbi, start_nid, freed, true); + spin_unlock(&NM_I(sbi)->nid_list_lock); } } -void build_free_nids(struct f2fs_sb_info *sbi) +static void scan_free_nid_bits(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_journal *journal = curseg->journal; + unsigned int i, idx; + + down_read(&nm_i->nat_tree_lock); + + for (i = 0; i < nm_i->nat_blocks; i++) { + if (!test_bit_le(i, nm_i->nat_block_bitmap)) + continue; + if (!nm_i->free_nid_count[i]) + continue; + for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) { + nid_t nid; + + if (!test_bit_le(idx, nm_i->free_nid_bitmap[i])) + continue; + + nid = i * NAT_ENTRY_PER_BLOCK + idx; + add_free_nid(sbi, nid, true); + + if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS) + goto out; + } + } +out: + down_read(&curseg->journal_rwsem); + for (i = 0; i < nats_in_cursum(journal); i++) { + block_t addr; + nid_t nid; + + addr = le32_to_cpu(nat_in_journal(journal, i).block_addr); + nid = le32_to_cpu(nid_in_journal(journal, i)); + if (addr == NULL_ADDR) + add_free_nid(sbi, nid, true); + else + remove_free_nid(sbi, nid); + } + up_read(&curseg->journal_rwsem); + up_read(&nm_i->nat_tree_lock); +} + +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -1793,10 +1982,24 @@ void build_free_nids(struct f2fs_sb_info *sbi) int i = 0; nid_t nid = nm_i->next_scan_nid; + if (unlikely(nid >= nm_i->max_nid)) + nid = 0; + /* Enough entries */ - if (nm_i->fcnt >= NAT_ENTRY_PER_BLOCK) + if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK) return; + if (!sync && !available_free_memory(sbi, FREE_NIDS)) + return; + + if (!mount) { + /* try to find free nids in free_nid_bitmap */ + scan_free_nid_bits(sbi); + + if (nm_i->nid_cnt[FREE_NID_LIST]) + return; + } + /* readahead nat pages to be scanned */ ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT, true); @@ -1830,7 +2033,7 @@ void build_free_nids(struct f2fs_sb_info *sbi) if (addr == NULL_ADDR) add_free_nid(sbi, nid, true); else - remove_free_nid(nm_i, nid); + remove_free_nid(sbi, nid); } up_read(&curseg->journal_rwsem); up_read(&nm_i->nat_tree_lock); @@ -1839,6 +2042,13 @@ void build_free_nids(struct f2fs_sb_info *sbi) nm_i->ra_nid_pages, META_NAT, false); } +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) +{ + mutex_lock(&NM_I(sbi)->build_lock); + __build_free_nids(sbi, sync, mount); + mutex_unlock(&NM_I(sbi)->build_lock); +} + /* * If this function returns success, caller can obtain a new nid * from second parameter of this function. @@ -1850,34 +2060,39 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct free_nid *i = NULL; retry: #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(sbi, FAULT_ALLOC_NID)) + if (time_to_inject(sbi, FAULT_ALLOC_NID)) { + f2fs_show_injection_info(FAULT_ALLOC_NID); return false; + } #endif - if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids)) - return false; + spin_lock(&nm_i->nid_list_lock); - spin_lock(&nm_i->free_nid_list_lock); + if (unlikely(nm_i->available_nids == 0)) { + spin_unlock(&nm_i->nid_list_lock); + return false; + } /* We should not use stale free nids created by build_free_nids */ - if (nm_i->fcnt && !on_build_free_nids(nm_i)) { - f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); - list_for_each_entry(i, &nm_i->free_nid_list, list) - if (i->state == NID_NEW) - break; - - f2fs_bug_on(sbi, i->state != NID_NEW); + if (nm_i->nid_cnt[FREE_NID_LIST] && !on_build_free_nids(nm_i)) { + f2fs_bug_on(sbi, list_empty(&nm_i->nid_list[FREE_NID_LIST])); + i = list_first_entry(&nm_i->nid_list[FREE_NID_LIST], + struct free_nid, list); *nid = i->nid; + + __remove_nid_from_list(sbi, i, FREE_NID_LIST, true); i->state = NID_ALLOC; - nm_i->fcnt--; - spin_unlock(&nm_i->free_nid_list_lock); + __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false); + nm_i->available_nids--; + + update_free_nid_bitmap(sbi, *nid, false, false); + + spin_unlock(&nm_i->nid_list_lock); return true; } - spin_unlock(&nm_i->free_nid_list_lock); + spin_unlock(&nm_i->nid_list_lock); /* Let's scan nat pages and its caches to get free nids */ - mutex_lock(&nm_i->build_lock); - build_free_nids(sbi); - mutex_unlock(&nm_i->build_lock); + build_free_nids(sbi, true, false); goto retry; } @@ -1889,11 +2104,11 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i; - spin_lock(&nm_i->free_nid_list_lock); + spin_lock(&nm_i->nid_list_lock); i = __lookup_free_nid_list(nm_i, nid); - f2fs_bug_on(sbi, !i || i->state != NID_ALLOC); - __del_from_free_nid_list(nm_i, i); - spin_unlock(&nm_i->free_nid_list_lock); + f2fs_bug_on(sbi, !i); + __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false); + spin_unlock(&nm_i->nid_list_lock); kmem_cache_free(free_nid_slab, i); } @@ -1910,17 +2125,24 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) if (!nid) return; - spin_lock(&nm_i->free_nid_list_lock); + spin_lock(&nm_i->nid_list_lock); i = __lookup_free_nid_list(nm_i, nid); - f2fs_bug_on(sbi, !i || i->state != NID_ALLOC); + f2fs_bug_on(sbi, !i); + if (!available_free_memory(sbi, FREE_NIDS)) { - __del_from_free_nid_list(nm_i, i); + __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false); need_free = true; } else { + __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, true); i->state = NID_NEW; - nm_i->fcnt++; + __insert_nid_to_list(sbi, i, FREE_NID_LIST, false); } - spin_unlock(&nm_i->free_nid_list_lock); + + nm_i->available_nids++; + + update_free_nid_bitmap(sbi, nid, true, false); + + spin_unlock(&nm_i->nid_list_lock); if (need_free) kmem_cache_free(free_nid_slab, i); @@ -1932,24 +2154,24 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) struct free_nid *i, *next; int nr = nr_shrink; - if (nm_i->fcnt <= MAX_FREE_NIDS) + if (nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS) return 0; if (!mutex_trylock(&nm_i->build_lock)) return 0; - spin_lock(&nm_i->free_nid_list_lock); - list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { - if (nr_shrink <= 0 || nm_i->fcnt <= MAX_FREE_NIDS) + spin_lock(&nm_i->nid_list_lock); + list_for_each_entry_safe(i, next, &nm_i->nid_list[FREE_NID_LIST], + list) { + if (nr_shrink <= 0 || + nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS) break; - if (i->state == NID_ALLOC) - continue; - __del_from_free_nid_list(nm_i, i); + + __remove_nid_from_list(sbi, i, FREE_NID_LIST, false); kmem_cache_free(free_nid_slab, i); - nm_i->fcnt--; nr_shrink--; } - spin_unlock(&nm_i->free_nid_list_lock); + spin_unlock(&nm_i->nid_list_lock); mutex_unlock(&nm_i->build_lock); return nr - nr_shrink; @@ -1982,38 +2204,47 @@ update_inode: f2fs_put_page(ipage, 1); } -void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) +int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; - nid_t new_xnid = nid_of_node(page); + nid_t new_xnid; + struct dnode_of_data dn; struct node_info ni; + struct page *xpage; - /* 1: invalidate the previous xattr nid */ if (!prev_xnid) goto recover_xnid; - /* Deallocate node address */ + /* 1: invalidate the previous xattr nid */ get_node_info(sbi, prev_xnid, &ni); f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); invalidate_blocks(sbi, ni.blk_addr); - dec_valid_node_count(sbi, inode); + dec_valid_node_count(sbi, inode, false); set_node_addr(sbi, &ni, NULL_ADDR, false); recover_xnid: - /* 2: allocate new xattr nid */ - if (unlikely(!inc_valid_node_count(sbi, inode))) - f2fs_bug_on(sbi, 1); + /* 2: update xattr nid in inode */ + if (!alloc_nid(sbi, &new_xnid)) + return -ENOSPC; - remove_free_nid(NM_I(sbi), new_xnid); - get_node_info(sbi, new_xnid, &ni); - ni.ino = inode->i_ino; - set_node_addr(sbi, &ni, NEW_ADDR, false); - f2fs_i_xnid_write(inode, new_xnid); + set_new_dnode(&dn, inode, NULL, NULL, new_xnid); + xpage = new_node_page(&dn, XATTR_NODE_OFFSET); + if (IS_ERR(xpage)) { + alloc_nid_failed(sbi, new_xnid); + return PTR_ERR(xpage); + } - /* 3: update xattr blkaddr */ - refresh_sit_entry(sbi, NEW_ADDR, blkaddr); - set_node_addr(sbi, &ni, blkaddr, false); + alloc_nid_done(sbi, new_xnid); + update_inode_page(inode); + + /* 3: update and set xattr node page dirty */ + memcpy(F2FS_NODE(xpage), F2FS_NODE(page), VALID_XATTR_BLOCK_SIZE); + + set_page_dirty(xpage); + f2fs_put_page(xpage, 1); + + return 0; } int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) @@ -2035,7 +2266,7 @@ retry: } /* Should not use this inode from free nid list */ - remove_free_nid(NM_I(sbi), ino); + remove_free_nid(sbi, ino); if (!PageUptodate(ipage)) SetPageUptodate(ipage); @@ -2049,12 +2280,19 @@ retry: dst->i_blocks = cpu_to_le64(1); dst->i_links = cpu_to_le32(1); dst->i_xattr_nid = 0; - dst->i_inline = src->i_inline & F2FS_INLINE_XATTR; + dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR); + if (dst->i_inline & F2FS_EXTRA_ATTR) { + dst->i_extra_isize = src->i_extra_isize; + if (f2fs_sb_has_project_quota(sbi->sb) && + F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), + i_projid)) + dst->i_projid = src->i_projid; + } new_ni = old_ni; new_ni.ino = ino; - if (unlikely(!inc_valid_node_count(sbi, NULL))) + if (unlikely(inc_valid_node_count(sbi, NULL, true))) WARN_ON(1); set_node_addr(sbi, &new_ni, NEW_ADDR, false); inc_valid_inode_count(sbi); @@ -2069,7 +2307,6 @@ int restore_node_summary(struct f2fs_sb_info *sbi, struct f2fs_node *rn; struct f2fs_summary *sum_entry; block_t addr; - int bio_blocks = MAX_BIO_BLOCKS(sbi); int i, idx, last_offset, nrpages; /* scan the node segment */ @@ -2078,7 +2315,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi, sum_entry = &sum->entries[0]; for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { - nrpages = min(last_offset - i, bio_blocks); + nrpages = min(last_offset - i, BIO_MAX_PAGES); /* readahead node pages */ ra_meta_pages(sbi, addr, nrpages, META_POR, true); @@ -2117,9 +2354,22 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) ne = __lookup_nat_cache(nm_i, nid); if (!ne) { - ne = grab_nat_entry(nm_i, nid); + ne = grab_nat_entry(nm_i, nid, true); node_info_from_raw_nat(&ne->ni, &raw_ne); } + + /* + * if a free nat in journal has not been used after last + * checkpoint, we should remove it from available nids, + * since later we will add it again. + */ + if (!get_nat_flag(ne, IS_DIRTY) && + le32_to_cpu(raw_ne.block_addr) == NULL_ADDR) { + spin_lock(&nm_i->nid_list_lock); + nm_i->available_nids--; + spin_unlock(&nm_i->nid_list_lock); + } + __set_nat_cache_dirty(nm_i, ne); } update_nats_in_cursum(journal, -i); @@ -2144,8 +2394,39 @@ add_out: list_add_tail(&nes->set_list, head); } +static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, + struct page *page) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK; + struct f2fs_nat_block *nat_blk = page_address(page); + int valid = 0; + int i; + + if (!enabled_nat_bits(sbi, NULL)) + return; + + for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) { + if (start_nid == 0 && i == 0) + valid++; + if (nat_blk->entries[i].block_addr) + valid++; + } + if (valid == 0) { + __set_bit_le(nat_index, nm_i->empty_nat_bits); + __clear_bit_le(nat_index, nm_i->full_nat_bits); + return; + } + + __clear_bit_le(nat_index, nm_i->empty_nat_bits); + if (valid == NAT_ENTRY_PER_BLOCK) + __set_bit_le(nat_index, nm_i->full_nat_bits); + else + __clear_bit_le(nat_index, nm_i->full_nat_bits); +} + static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, - struct nat_entry_set *set) + struct nat_entry_set *set, struct cp_control *cpc) { struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); struct f2fs_journal *journal = curseg->journal; @@ -2160,7 +2441,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, * #1, flush nat entries to journal in current hot data summary block. * #2, flush nat entries to nat page. */ - if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL)) + if (enabled_nat_bits(sbi, cpc) || + !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL)) to_journal = false; if (to_journal) { @@ -2177,8 +2459,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, nid_t nid = nat_get_nid(ne); int offset; - if (nat_get_blkaddr(ne) == NEW_ADDR) - continue; + f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR); if (to_journal) { offset = lookup_journal_in_cursum(journal, @@ -2191,26 +2472,38 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, } raw_nat_from_node_info(raw_ne, &ne->ni); nat_reset_flag(ne); - __clear_nat_cache_dirty(NM_I(sbi), ne); - if (nat_get_blkaddr(ne) == NULL_ADDR) + __clear_nat_cache_dirty(NM_I(sbi), set, ne); + if (nat_get_blkaddr(ne) == NULL_ADDR) { add_free_nid(sbi, nid, false); + spin_lock(&NM_I(sbi)->nid_list_lock); + NM_I(sbi)->available_nids++; + update_free_nid_bitmap(sbi, nid, true, false); + spin_unlock(&NM_I(sbi)->nid_list_lock); + } else { + spin_lock(&NM_I(sbi)->nid_list_lock); + update_free_nid_bitmap(sbi, nid, false, false); + spin_unlock(&NM_I(sbi)->nid_list_lock); + } } - if (to_journal) + if (to_journal) { up_write(&curseg->journal_rwsem); - else + } else { + __update_nat_bits(sbi, start_nid, page); f2fs_put_page(page, 1); + } - f2fs_bug_on(sbi, set->entry_cnt); - - radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); - kmem_cache_free(nat_entry_set_slab, set); + /* Allow dirty nats by node block allocation in write_begin */ + if (!set->entry_cnt) { + radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); + kmem_cache_free(nat_entry_set_slab, set); + } } /* * This function is called during the checkpointing process. */ -void flush_nat_entries(struct f2fs_sb_info *sbi) +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -2231,7 +2524,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) * entries, remove all entries from journal and merge them * into nat entry set. */ - if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL)) + if (enabled_nat_bits(sbi, cpc) || + !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL)) remove_nats_in_journal(sbi); while ((found = __gang_lookup_nat_set(nm_i, @@ -2245,11 +2539,86 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) /* flush dirty nats in nat entry set */ list_for_each_entry_safe(set, tmp, &sets, set_list) - __flush_nat_entry_set(sbi, set); + __flush_nat_entry_set(sbi, set, cpc); up_write(&nm_i->nat_tree_lock); + /* Allow dirty nats by node block allocation in write_begin */ +} - f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); +static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE; + unsigned int i; + __u64 cp_ver = cur_cp_version(ckpt); + block_t nat_bits_addr; + + if (!enabled_nat_bits(sbi, NULL)) + return 0; + + nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 + + F2FS_BLKSIZE - 1); + nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, + GFP_KERNEL); + if (!nm_i->nat_bits) + return -ENOMEM; + + nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg - + nm_i->nat_bits_blocks; + for (i = 0; i < nm_i->nat_bits_blocks; i++) { + struct page *page = get_meta_page(sbi, nat_bits_addr++); + + memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), + page_address(page), F2FS_BLKSIZE); + f2fs_put_page(page, 1); + } + + cp_ver |= (cur_cp_crc(ckpt) << 32); + if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) { + disable_nat_bits(sbi, true); + return 0; + } + + nm_i->full_nat_bits = nm_i->nat_bits + 8; + nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes; + + f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint"); + return 0; +} + +static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned int i = 0; + nid_t nid, last_nid; + + if (!enabled_nat_bits(sbi, NULL)) + return; + + for (i = 0; i < nm_i->nat_blocks; i++) { + i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i); + if (i >= nm_i->nat_blocks) + break; + + __set_bit_le(i, nm_i->nat_block_bitmap); + + nid = i * NAT_ENTRY_PER_BLOCK; + last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK; + + spin_lock(&NM_I(sbi)->nid_list_lock); + for (; nid < last_nid; nid++) + update_free_nid_bitmap(sbi, nid, true, true); + spin_unlock(&NM_I(sbi)->nid_list_lock); + } + + for (i = 0; i < nm_i->nat_blocks; i++) { + i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i); + if (i >= nm_i->nat_blocks) + break; + + __set_bit_le(i, nm_i->nat_block_bitmap); + } } static int init_node_manager(struct f2fs_sb_info *sbi) @@ -2257,32 +2626,35 @@ static int init_node_manager(struct f2fs_sb_info *sbi) struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned char *version_bitmap; - unsigned int nat_segs, nat_blocks; + unsigned int nat_segs; + int err; nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr); /* segment_count_nat includes pair segment so divide to 2. */ nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; - nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); - - nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; + nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); + nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks; /* not used nids: 0, node, meta, (and root counted as valid node) */ - nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM; - nm_i->fcnt = 0; + nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count - + F2FS_RESERVED_NODE_NUM; + nm_i->nid_cnt[FREE_NID_LIST] = 0; + nm_i->nid_cnt[ALLOC_NID_LIST] = 0; nm_i->nat_cnt = 0; nm_i->ram_thresh = DEF_RAM_THRESHOLD; nm_i->ra_nid_pages = DEF_RA_NID_PAGES; nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); - INIT_LIST_HEAD(&nm_i->free_nid_list); + INIT_LIST_HEAD(&nm_i->nid_list[FREE_NID_LIST]); + INIT_LIST_HEAD(&nm_i->nid_list[ALLOC_NID_LIST]); INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); INIT_LIST_HEAD(&nm_i->nat_entries); mutex_init(&nm_i->build_lock); - spin_lock_init(&nm_i->free_nid_list_lock); + spin_lock_init(&nm_i->nid_list_lock); init_rwsem(&nm_i->nat_tree_lock); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); @@ -2295,6 +2667,39 @@ static int init_node_manager(struct f2fs_sb_info *sbi) GFP_KERNEL); if (!nm_i->nat_bitmap) return -ENOMEM; + + err = __get_nat_bitmaps(sbi); + if (err) + return err; + +#ifdef CONFIG_F2FS_CHECK_FS + nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size, + GFP_KERNEL); + if (!nm_i->nat_bitmap_mir) + return -ENOMEM; +#endif + + return 0; +} + +static int init_free_nid_cache(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + + nm_i->free_nid_bitmap = kvzalloc(nm_i->nat_blocks * + NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL); + if (!nm_i->free_nid_bitmap) + return -ENOMEM; + + nm_i->nat_block_bitmap = kvzalloc(nm_i->nat_blocks / 8, + GFP_KERNEL); + if (!nm_i->nat_block_bitmap) + return -ENOMEM; + + nm_i->free_nid_count = kvzalloc(nm_i->nat_blocks * + sizeof(unsigned short), GFP_KERNEL); + if (!nm_i->free_nid_count) + return -ENOMEM; return 0; } @@ -2310,7 +2715,14 @@ int build_node_manager(struct f2fs_sb_info *sbi) if (err) return err; - build_free_nids(sbi); + err = init_free_nid_cache(sbi); + if (err) + return err; + + /* load free nid status from nat_bits table */ + load_free_nid_bitmap(sbi); + + build_free_nids(sbi, true, true); return 0; } @@ -2327,17 +2739,18 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) return; /* destroy free nid list */ - spin_lock(&nm_i->free_nid_list_lock); - list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { - f2fs_bug_on(sbi, i->state == NID_ALLOC); - __del_from_free_nid_list(nm_i, i); - nm_i->fcnt--; - spin_unlock(&nm_i->free_nid_list_lock); + spin_lock(&nm_i->nid_list_lock); + list_for_each_entry_safe(i, next_i, &nm_i->nid_list[FREE_NID_LIST], + list) { + __remove_nid_from_list(sbi, i, FREE_NID_LIST, false); + spin_unlock(&nm_i->nid_list_lock); kmem_cache_free(free_nid_slab, i); - spin_lock(&nm_i->free_nid_list_lock); + spin_lock(&nm_i->nid_list_lock); } - f2fs_bug_on(sbi, nm_i->fcnt); - spin_unlock(&nm_i->free_nid_list_lock); + f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID_LIST]); + f2fs_bug_on(sbi, nm_i->nid_cnt[ALLOC_NID_LIST]); + f2fs_bug_on(sbi, !list_empty(&nm_i->nid_list[ALLOC_NID_LIST])); + spin_unlock(&nm_i->nid_list_lock); /* destroy nat cache */ down_write(&nm_i->nat_tree_lock); @@ -2367,7 +2780,15 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) } up_write(&nm_i->nat_tree_lock); + kvfree(nm_i->nat_block_bitmap); + kvfree(nm_i->free_nid_bitmap); + kvfree(nm_i->free_nid_count); + kfree(nm_i->nat_bitmap); + kfree(nm_i->nat_bits); +#ifdef CONFIG_F2FS_CHECK_FS + kfree(nm_i->nat_bitmap_mir); +#endif sbi->nm_info = NULL; kfree(nm_i); } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 868bec65e51c..bb53e9955ff2 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -9,10 +9,10 @@ * published by the Free Software Foundation. */ /* start node id of a node block dedicated to the given node id */ -#define START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) +#define START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) /* node block offset on the NAT area dedicated to the given start node id */ -#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK) +#define NAT_BLOCK_OFFSET(start_nid) ((start_nid) / NAT_ENTRY_PER_BLOCK) /* # of pages to perform synchronous readahead before building free nids */ #define FREE_NID_PAGES 8 @@ -62,16 +62,16 @@ struct nat_entry { struct node_info ni; /* in-memory node information */ }; -#define nat_get_nid(nat) (nat->ni.nid) -#define nat_set_nid(nat, n) (nat->ni.nid = n) -#define nat_get_blkaddr(nat) (nat->ni.blk_addr) -#define nat_set_blkaddr(nat, b) (nat->ni.blk_addr = b) -#define nat_get_ino(nat) (nat->ni.ino) -#define nat_set_ino(nat, i) (nat->ni.ino = i) -#define nat_get_version(nat) (nat->ni.version) -#define nat_set_version(nat, v) (nat->ni.version = v) +#define nat_get_nid(nat) ((nat)->ni.nid) +#define nat_set_nid(nat, n) ((nat)->ni.nid = (n)) +#define nat_get_blkaddr(nat) ((nat)->ni.blk_addr) +#define nat_set_blkaddr(nat, b) ((nat)->ni.blk_addr = (b)) +#define nat_get_ino(nat) ((nat)->ni.ino) +#define nat_set_ino(nat, i) ((nat)->ni.ino = (i)) +#define nat_get_version(nat) ((nat)->ni.version) +#define nat_set_version(nat, v) ((nat)->ni.version = (v)) -#define inc_node_version(version) (++version) +#define inc_node_version(version) (++(version)) static inline void copy_node_info(struct node_info *dst, struct node_info *src) @@ -169,14 +169,15 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *fnid; - spin_lock(&nm_i->free_nid_list_lock); - if (nm_i->fcnt <= 0) { - spin_unlock(&nm_i->free_nid_list_lock); + spin_lock(&nm_i->nid_list_lock); + if (nm_i->nid_cnt[FREE_NID_LIST] <= 0) { + spin_unlock(&nm_i->nid_list_lock); return; } - fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list); + fnid = list_first_entry(&nm_i->nid_list[FREE_NID_LIST], + struct free_nid, list); *nid = fnid->nid; - spin_unlock(&nm_i->free_nid_list_lock); + spin_unlock(&nm_i->nid_list_lock); } /* @@ -185,6 +186,12 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr) { struct f2fs_nm_info *nm_i = NM_I(sbi); + +#ifdef CONFIG_F2FS_CHECK_FS + if (memcmp(nm_i->nat_bitmap, nm_i->nat_bitmap_mir, + nm_i->bitmap_size)) + f2fs_bug_on(sbi, 1); +#endif memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size); } @@ -193,13 +200,16 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start) struct f2fs_nm_info *nm_i = NM_I(sbi); pgoff_t block_off; pgoff_t block_addr; - int seg_off; + /* + * block_off = segment_off * 512 + off_in_segment + * OLD = (segment_off * 512) * 2 + off_in_segment + * NEW = 2 * (segment_off * 512 + off_in_segment) - off_in_segment + */ block_off = NAT_BLOCK_OFFSET(start); - seg_off = block_off >> sbi->log_blocks_per_seg; block_addr = (pgoff_t)(nm_i->nat_blkaddr + - (seg_off << sbi->log_blocks_per_seg << 1) + + (block_off << 1) - (block_off & (sbi->blocks_per_seg - 1))); if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) @@ -214,11 +224,7 @@ static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi, struct f2fs_nm_info *nm_i = NM_I(sbi); block_addr -= nm_i->nat_blkaddr; - if ((block_addr >> sbi->log_blocks_per_seg) % 2) - block_addr -= sbi->blocks_per_seg; - else - block_addr += sbi->blocks_per_seg; - + block_addr ^= 1 << sbi->log_blocks_per_seg; return block_addr + nm_i->nat_blkaddr; } @@ -227,6 +233,9 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); f2fs_change_bit(block_off, nm_i->nat_bitmap); +#ifdef CONFIG_F2FS_CHECK_FS + f2fs_change_bit(block_off, nm_i->nat_bitmap_mir); +#endif } static inline nid_t ino_of_node(struct page *node_page) @@ -290,14 +299,11 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); struct f2fs_node *rn = F2FS_NODE(page); - size_t crc_offset = le32_to_cpu(ckpt->checksum_offset); - __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver); + __u64 cp_ver = cur_cp_version(ckpt); + + if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) + cp_ver |= (cur_cp_crc(ckpt) << 32); - if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) { - __u64 crc = le32_to_cpu(*((__le32 *) - ((unsigned char *)ckpt + crc_offset))); - cp_ver |= (crc << 32); - } rn->footer.cp_ver = cpu_to_le64(cp_ver); rn->footer.next_blkaddr = cpu_to_le32(blkaddr); } @@ -305,15 +311,12 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) static inline bool is_recoverable_dnode(struct page *page) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); - size_t crc_offset = le32_to_cpu(ckpt->checksum_offset); __u64 cp_ver = cur_cp_version(ckpt); - if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) { - __u64 crc = le32_to_cpu(*((__le32 *) - ((unsigned char *)ckpt + crc_offset))); - cp_ver |= (crc << 32); - } - return cpu_to_le64(cp_ver) == cpver_of_node(page); + if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) + cp_ver |= (cur_cp_crc(ckpt) << 32); + + return cp_ver == cpver_of_node(page); } /* @@ -342,7 +345,7 @@ static inline bool IS_DNODE(struct page *node_page) unsigned int ofs = ofs_of_node(node_page); if (f2fs_has_xattr_block(ofs)) - return false; + return true; if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || ofs == 5 + 2 * NIDS_PER_BLOCK) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 98c1a63a4614..9626758bc762 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -69,20 +69,34 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, } static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi, - struct list_head *head, nid_t ino) + struct list_head *head, nid_t ino, bool quota_inode) { struct inode *inode; struct fsync_inode_entry *entry; + int err; inode = f2fs_iget_retry(sbi->sb, ino); if (IS_ERR(inode)) return ERR_CAST(inode); + err = dquot_initialize(inode); + if (err) + goto err_out; + + if (quota_inode) { + err = dquot_alloc_inode(inode); + if (err) + goto err_out; + } + entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO); entry->inode = inode; list_add_tail(&entry->list, head); return entry; +err_out: + iput(inode); + return ERR_PTR(err); } static void del_fsync_inode(struct fsync_inode_entry *entry) @@ -107,7 +121,8 @@ static int recover_dentry(struct inode *inode, struct page *ipage, entry = get_fsync_inode(dir_list, pino); if (!entry) { - entry = add_fsync_inode(F2FS_I_SB(inode), dir_list, pino); + entry = add_fsync_inode(F2FS_I_SB(inode), dir_list, + pino, false); if (IS_ERR(entry)) { dir = ERR_CAST(entry); err = PTR_ERR(entry); @@ -140,6 +155,13 @@ retry: err = -EEXIST; goto out_unmap_put; } + + err = dquot_initialize(einode); + if (err) { + iput(einode); + goto out_unmap_put; + } + err = acquire_orphan_inode(F2FS_I_SB(inode)); if (err) { iput(einode); @@ -180,13 +202,15 @@ static void recover_inode(struct inode *inode, struct page *page) inode->i_mode = le16_to_cpu(raw->i_mode); f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); - inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime); + inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime); inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime); - inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); + inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec); inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); + F2FS_I(inode)->i_advise = raw->i_advise; + if (file_enc_name(inode)) name = ""; else @@ -196,33 +220,8 @@ static void recover_inode(struct inode *inode, struct page *page) ino_of_node(page), name); } -static bool is_same_inode(struct inode *inode, struct page *ipage) -{ - struct f2fs_inode *ri = F2FS_INODE(ipage); - struct timespec disk; - - if (!IS_INODE(ipage)) - return true; - - disk.tv_sec = le64_to_cpu(ri->i_ctime); - disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec); - if (timespec_compare(&inode->i_ctime, &disk) > 0) - return false; - - disk.tv_sec = le64_to_cpu(ri->i_atime); - disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec); - if (timespec_compare(&inode->i_atime, &disk) > 0) - return false; - - disk.tv_sec = le64_to_cpu(ri->i_mtime); - disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); - if (timespec_compare(&inode->i_mtime, &disk) > 0) - return false; - - return true; -} - -static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) +static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, + bool check_only) { struct curseg_info *curseg; struct page *page = NULL; @@ -248,21 +247,23 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) goto next; entry = get_fsync_inode(head, ino_of_node(page)); - if (entry) { - if (!is_same_inode(entry->inode, page)) - goto next; - } else { - if (IS_INODE(page) && is_dent_dnode(page)) { + if (!entry) { + bool quota_inode = false; + + if (!check_only && + IS_INODE(page) && is_dent_dnode(page)) { err = recover_inode_page(sbi, page); if (err) break; + quota_inode = true; } /* * CP | dnode(F) | inode(DF) * For this case, we should not give up now. */ - entry = add_fsync_inode(sbi, head, ino_of_node(page)); + entry = add_fsync_inode(sbi, head, ino_of_node(page), + quota_inode); if (IS_ERR(entry)) { err = PTR_ERR(entry); if (err == -ENOENT) { @@ -353,10 +354,18 @@ got_it: f2fs_put_page(node_page, 1); if (ino != dn->inode->i_ino) { + int ret; + /* Deallocate previous index in the node page */ inode = f2fs_iget_retry(sbi->sb, ino); if (IS_ERR(inode)) return PTR_ERR(inode); + + ret = dquot_initialize(inode); + if (ret) { + iput(inode); + return ret; + } } else { inode = dn->inode; } @@ -386,7 +395,8 @@ out: return 0; truncate_out: - if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr) + if (datablock_addr(tdn.inode, tdn.node_page, + tdn.ofs_in_node) == blkaddr) truncate_data_blocks_range(&tdn, 1); if (dn->inode->i_ino == nid && !dn->inode_page_locked) unlock_page(dn->inode_page); @@ -405,11 +415,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, if (IS_INODE(page)) { recover_inline_xattr(inode, page); } else if (f2fs_has_xattr_block(ofs_of_node(page))) { - /* - * Deprecated; xattr blocks should be found from cold log. - * But, we should remain this for backward compatibility. - */ - recover_xattr_data(inode, page, blkaddr); + err = recover_xattr_data(inode, page, blkaddr); + if (!err) + recovered++; goto out; } @@ -441,8 +449,8 @@ retry_dn: for (; start < end; start++, dn.ofs_in_node++) { block_t src, dest; - src = datablock_addr(dn.node_page, dn.ofs_in_node); - dest = datablock_addr(page, dn.ofs_in_node); + src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); + dest = datablock_addr(dn.inode, page, dn.ofs_in_node); /* skip recovering if dest is the same as src */ if (src == dest) @@ -454,8 +462,10 @@ retry_dn: continue; } - if ((start + 1) << PAGE_SHIFT > i_size_read(inode)) - f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT); + if (!file_keep_isize(inode) && + (i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT))) + f2fs_i_size_write(inode, + (loff_t)(start + 1) << PAGE_SHIFT); /* * dest is reserved block, invalidate src block @@ -507,8 +517,10 @@ err: f2fs_put_dnode(&dn); out: f2fs_msg(sbi->sb, KERN_NOTICE, - "recover_data: ino = %lx, recovered = %d blocks, err = %d", - inode->i_ino, recovered, err); + "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d", + inode->i_ino, + file_keep_isize(inode) ? "keep" : "recover", + recovered, err); return err; } @@ -576,18 +588,31 @@ next: int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) { - struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); struct list_head inode_list; struct list_head dir_list; - block_t blkaddr; int err; int ret = 0; + unsigned long s_flags = sbi->sb->s_flags; bool need_writecp = false; + if (s_flags & MS_RDONLY) { + f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); + sbi->sb->s_flags &= ~MS_RDONLY; + } + +#ifdef CONFIG_QUOTA + /* Needed for iput() to work correctly and not trash data */ + sbi->sb->s_flags |= MS_ACTIVE; + /* Turn on quotas so that they are updated correctly */ + f2fs_enable_quota_files(sbi); +#endif + fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", sizeof(struct fsync_inode_entry)); - if (!fsync_entry_slab) - return -ENOMEM; + if (!fsync_entry_slab) { + err = -ENOMEM; + goto out; + } INIT_LIST_HEAD(&inode_list); INIT_LIST_HEAD(&dir_list); @@ -595,16 +620,14 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) /* prevent checkpoint */ mutex_lock(&sbi->cp_mutex); - blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); - /* step #1: find fsynced inode numbers */ - err = find_fsync_dnodes(sbi, &inode_list); + err = find_fsync_dnodes(sbi, &inode_list, check_only); if (err || list_empty(&inode_list)) - goto out; + goto skip; if (check_only) { ret = 1; - goto out; + goto skip; } need_writecp = true; @@ -613,7 +636,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) err = recover_data(sbi, &inode_list, &dir_list); if (!err) f2fs_bug_on(sbi, !list_empty(&inode_list)); -out: +skip: destroy_fsync_dnodes(&inode_list); /* truncate meta pages to be used by the recovery */ @@ -639,5 +662,12 @@ out: } kmem_cache_destroy(fsync_entry_slab); +out: +#ifdef CONFIG_QUOTA + /* Turn quotas off */ + f2fs_quota_off_umount(sbi->sb); +#endif + sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ + return ret ? ret: err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 74a2b444406d..951ebd59643b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -16,17 +16,20 @@ #include #include #include +#include +#include #include "f2fs.h" #include "segment.h" #include "node.h" +#include "gc.h" #include "trace.h" #include #define __reverse_ffz(x) __reverse_ffs(~(x)) static struct kmem_cache *discard_entry_slab; -static struct kmem_cache *bio_entry_slab; +static struct kmem_cache *discard_cmd_slab; static struct kmem_cache *sit_entry_set_slab; static struct kmem_cache *inmem_entry_slab; @@ -166,6 +169,21 @@ found: return result - size + __reverse_ffz(tmp); } +bool need_SSR(struct f2fs_sb_info *sbi) +{ + int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); + int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); + + if (test_opt(sbi, LFS)) + return false; + if (sbi->gc_thread && sbi->gc_thread->gc_urgent) + return true; + + return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + + 2 * reserved_sections(sbi)); +} + void register_inmem_page(struct inode *inode, struct page *page) { struct f2fs_inode_info *fi = F2FS_I(inode); @@ -212,9 +230,15 @@ static int __revoke_inmem_pages(struct inode *inode, struct node_info ni; trace_f2fs_commit_inmem_page(page, INMEM_REVOKE); - +retry: set_new_dnode(&dn, inode, NULL, NULL, 0); - if (get_dnode_of_data(&dn, page->index, LOOKUP_NODE)) { + err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + if (err) { + if (err == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + cond_resched(); + goto retry; + } err = -EAGAIN; goto next; } @@ -242,11 +266,43 @@ void drop_inmem_pages(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); - clear_inode_flag(inode, FI_ATOMIC_FILE); - mutex_lock(&fi->inmem_lock); __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); mutex_unlock(&fi->inmem_lock); + + clear_inode_flag(inode, FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_HOT_DATA); + stat_dec_atomic_write(inode); +} + +void drop_inmem_page(struct inode *inode, struct page *page) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct list_head *head = &fi->inmem_pages; + struct inmem_pages *cur = NULL; + + f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page)); + + mutex_lock(&fi->inmem_lock); + list_for_each_entry(cur, head, list) { + if (cur->page == page) + break; + } + + f2fs_bug_on(sbi, !cur || cur->page != page); + list_del(&cur->list); + mutex_unlock(&fi->inmem_lock); + + dec_page_count(sbi, F2FS_INMEM_PAGES); + kmem_cache_free(inmem_entry_slab, cur); + + ClearPageUptodate(page); + set_page_private(page, 0); + ClearPagePrivate(page); + f2fs_put_page(page, 0); + + trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE); } static int __commit_inmem_pages(struct inode *inode, @@ -259,10 +315,10 @@ static int __commit_inmem_pages(struct inode *inode, .sbi = sbi, .type = DATA, .op = REQ_OP_WRITE, - .op_flags = WRITE_SYNC | REQ_PRIO, - .encrypted_page = NULL, + .op_flags = REQ_SYNC | REQ_PRIO, + .io_type = FS_DATA_IO, }; - bool submit_bio = false; + pgoff_t last_idx = ULONG_MAX; int err = 0; list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { @@ -274,28 +330,35 @@ static int __commit_inmem_pages(struct inode *inode, set_page_dirty(page); f2fs_wait_on_page_writeback(page, DATA, true); - if (clear_page_dirty_for_io(page)) + if (clear_page_dirty_for_io(page)) { inode_dec_dirty_pages(inode); - + remove_dirty_inode(inode); + } +retry: fio.page = page; + fio.old_blkaddr = NULL_ADDR; + fio.encrypted_page = NULL; + fio.need_lock = LOCK_DONE; err = do_write_data_page(&fio); if (err) { + if (err == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + cond_resched(); + goto retry; + } unlock_page(page); break; } - /* record old blkaddr for revoking */ cur->old_addr = fio.old_blkaddr; - - clear_cold_data(page); - submit_bio = true; + last_idx = page->index; } unlock_page(page); list_move_tail(&cur->list, revoke_list); } - if (submit_bio) - f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE); + if (last_idx != ULONG_MAX) + f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA); if (!err) __revoke_inmem_pages(inode, revoke_list, false, false); @@ -314,6 +377,8 @@ int commit_inmem_pages(struct inode *inode) f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); + set_inode_flag(inode, FI_ATOMIC_COMMIT); + mutex_lock(&fi->inmem_lock); err = __commit_inmem_pages(inode, &revoke_list); if (err) { @@ -335,6 +400,8 @@ int commit_inmem_pages(struct inode *inode) } mutex_unlock(&fi->inmem_lock); + clear_inode_flag(inode, FI_ATOMIC_COMMIT); + f2fs_unlock_op(sbi); return err; } @@ -346,15 +413,14 @@ int commit_inmem_pages(struct inode *inode) void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(sbi, FAULT_CHECKPOINT)) + if (time_to_inject(sbi, FAULT_CHECKPOINT)) { + f2fs_show_injection_info(FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); + } #endif - if (!need) - return; - /* balance_fs_bg is able to be pending */ - if (excess_cached_nats(sbi)) + if (need && excess_cached_nats(sbi)) f2fs_balance_fs_bg(sbi); /* @@ -363,7 +429,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) */ if (has_not_enough_free_secs(sbi, 0, 0)) { mutex_lock(&sbi->gc_mutex); - f2fs_gc(sbi, false); + f2fs_gc(sbi, false, false, NULL_SEGNO); } } @@ -380,14 +446,17 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) if (!available_free_memory(sbi, FREE_NIDS)) try_to_free_nids(sbi, MAX_FREE_NIDS); else - build_free_nids(sbi); + build_free_nids(sbi, false, false); + + if (!is_idle(sbi) && !excess_dirty_nats(sbi)) + return; /* checkpoint is the only way to shrink partial cached entries */ if (!available_free_memory(sbi, NAT_ENTRIES) || !available_free_memory(sbi, INO_ENTRIES) || excess_prefree_segs(sbi) || excess_dirty_nats(sbi) || - (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) { + f2fs_time_over(sbi, CP_TIME)) { if (test_opt(sbi, DATA_FLUSH)) { struct blk_plug plug; @@ -400,38 +469,69 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) } } +static int __submit_flush_wait(struct f2fs_sb_info *sbi, + struct block_device *bdev) +{ + struct bio *bio = f2fs_bio_alloc(0); + int ret; + + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; + bio->bi_bdev = bdev; + ret = submit_bio_wait(bio); + bio_put(bio); + + trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER), + test_opt(sbi, FLUSH_MERGE), ret); + return ret; +} + +static int submit_flush_wait(struct f2fs_sb_info *sbi) +{ + int ret = __submit_flush_wait(sbi, sbi->sb->s_bdev); + int i; + + if (!sbi->s_ndevs || ret) + return ret; + + for (i = 1; i < sbi->s_ndevs; i++) { + ret = __submit_flush_wait(sbi, FDEV(i).bdev); + if (ret) + break; + } + return ret; +} + static int issue_flush_thread(void *data) { struct f2fs_sb_info *sbi = data; - struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; + struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; wait_queue_head_t *q = &fcc->flush_wait_queue; repeat: if (kthread_should_stop()) return 0; + sb_start_intwrite(sbi->sb); + if (!llist_empty(&fcc->issue_list)) { - struct bio *bio; struct flush_cmd *cmd, *next; int ret; - bio = f2fs_bio_alloc(0); - fcc->dispatch_list = llist_del_all(&fcc->issue_list); fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); - bio->bi_bdev = sbi->sb->s_bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); - ret = submit_bio_wait(bio); + ret = submit_flush_wait(sbi); + atomic_inc(&fcc->issued_flush); llist_for_each_entry_safe(cmd, next, fcc->dispatch_list, llnode) { cmd->ret = ret; complete(&cmd->wait); } - bio_put(bio); fcc->dispatch_list = NULL; } + sb_end_intwrite(sbi->sb); + wait_event_interruptible(*q, kthread_should_stop() || !llist_empty(&fcc->issue_list)); goto repeat; @@ -439,38 +539,63 @@ repeat: int f2fs_issue_flush(struct f2fs_sb_info *sbi) { - struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; + struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; struct flush_cmd cmd; - - trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER), - test_opt(sbi, FLUSH_MERGE)); + int ret; if (test_opt(sbi, NOBARRIER)) return 0; - if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) { - struct bio *bio = f2fs_bio_alloc(0); - int ret; + if (!test_opt(sbi, FLUSH_MERGE)) { + ret = submit_flush_wait(sbi); + atomic_inc(&fcc->issued_flush); + return ret; + } - atomic_inc(&fcc->submit_flush); - bio->bi_bdev = sbi->sb->s_bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); - ret = submit_bio_wait(bio); - atomic_dec(&fcc->submit_flush); - bio_put(bio); + if (atomic_inc_return(&fcc->issing_flush) == 1) { + ret = submit_flush_wait(sbi); + atomic_dec(&fcc->issing_flush); + + atomic_inc(&fcc->issued_flush); return ret; } init_completion(&cmd.wait); - atomic_inc(&fcc->submit_flush); llist_add(&cmd.llnode, &fcc->issue_list); - if (!fcc->dispatch_list) + /* update issue_list before we wake up issue_flush thread */ + smp_mb(); + + if (waitqueue_active(&fcc->flush_wait_queue)) wake_up(&fcc->flush_wait_queue); - wait_for_completion(&cmd.wait); - atomic_dec(&fcc->submit_flush); + if (fcc->f2fs_issue_flush) { + wait_for_completion(&cmd.wait); + atomic_dec(&fcc->issing_flush); + } else { + struct llist_node *list; + + list = llist_del_all(&fcc->issue_list); + if (!list) { + wait_for_completion(&cmd.wait); + atomic_dec(&fcc->issing_flush); + } else { + struct flush_cmd *tmp, *next; + + ret = submit_flush_wait(sbi); + + llist_for_each_entry_safe(tmp, next, list, llnode) { + if (tmp == &cmd) { + cmd.ret = ret; + atomic_dec(&fcc->issing_flush); + continue; + } + tmp->ret = ret; + complete(&tmp->wait); + } + } + } return cmd.ret; } @@ -481,33 +606,51 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) struct flush_cmd_control *fcc; int err = 0; + if (SM_I(sbi)->fcc_info) { + fcc = SM_I(sbi)->fcc_info; + if (fcc->f2fs_issue_flush) + return err; + goto init_thread; + } + fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); if (!fcc) return -ENOMEM; - atomic_set(&fcc->submit_flush, 0); + atomic_set(&fcc->issued_flush, 0); + atomic_set(&fcc->issing_flush, 0); init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); - SM_I(sbi)->cmd_control_info = fcc; + SM_I(sbi)->fcc_info = fcc; + if (!test_opt(sbi, FLUSH_MERGE)) + return err; + +init_thread: fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(fcc->f2fs_issue_flush)) { err = PTR_ERR(fcc->f2fs_issue_flush); kfree(fcc); - SM_I(sbi)->cmd_control_info = NULL; + SM_I(sbi)->fcc_info = NULL; return err; } return err; } -void destroy_flush_cmd_control(struct f2fs_sb_info *sbi) +void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) { - struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; + struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; - if (fcc && fcc->f2fs_issue_flush) - kthread_stop(fcc->f2fs_issue_flush); - kfree(fcc); - SM_I(sbi)->cmd_control_info = NULL; + if (fcc && fcc->f2fs_issue_flush) { + struct task_struct *flush_thread = fcc->f2fs_issue_flush; + + fcc->f2fs_issue_flush = NULL; + kthread_stop(flush_thread); + } + if (free) { + kfree(fcc); + SM_I(sbi)->fcc_info = NULL; + } } static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, @@ -550,8 +693,8 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) dirty_i->nr_dirty[t]--; - if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) - clear_bit(GET_SECNO(sbi, segno), + if (get_valid_blocks(sbi, segno, true) == 0) + clear_bit(GET_SEC_FROM_SEG(sbi, segno), dirty_i->victim_secmap); } } @@ -571,7 +714,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_lock(&dirty_i->seglist_lock); - valid_blocks = get_valid_blocks(sbi, segno, 0); + valid_blocks = get_valid_blocks(sbi, segno, false); if (valid_blocks == 0) { __locate_dirty_segment(sbi, segno, PRE); @@ -586,120 +729,647 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } -static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi, - struct bio *bio) +static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len) { - struct list_head *wait_list = &(SM_I(sbi)->wait_list); - struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS); + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *pend_list; + struct discard_cmd *dc; - INIT_LIST_HEAD(&be->list); - be->bio = bio; - init_completion(&be->event); - list_add_tail(&be->list, wait_list); + f2fs_bug_on(sbi, !len); - return be; + pend_list = &dcc->pend_list[plist_idx(len)]; + + dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS); + INIT_LIST_HEAD(&dc->list); + dc->bdev = bdev; + dc->lstart = lstart; + dc->start = start; + dc->len = len; + dc->ref = 0; + dc->state = D_PREP; + dc->error = 0; + init_completion(&dc->wait); + list_add_tail(&dc->list, pend_list); + atomic_inc(&dcc->discard_cmd_cnt); + dcc->undiscard_blks += len; + + return dc; } -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi) +static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len, + struct rb_node *parent, struct rb_node **p) { - struct list_head *wait_list = &(SM_I(sbi)->wait_list); - struct bio_entry *be, *tmp; + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *dc; - list_for_each_entry_safe(be, tmp, wait_list, list) { - struct bio *bio = be->bio; - int err; + dc = __create_discard_cmd(sbi, bdev, lstart, start, len); - wait_for_completion_io(&be->event); - err = be->error; - if (err == -EOPNOTSUPP) - err = 0; + rb_link_node(&dc->rb_node, parent, p); + rb_insert_color(&dc->rb_node, &dcc->root); - if (err) - f2fs_msg(sbi->sb, KERN_INFO, - "Issue discard failed, ret: %d", err); + return dc; +} - bio_put(bio); - list_del(&be->list); - kmem_cache_free(bio_entry_slab, be); +static void __detach_discard_cmd(struct discard_cmd_control *dcc, + struct discard_cmd *dc) +{ + if (dc->state == D_DONE) + atomic_dec(&dcc->issing_discard); + + list_del(&dc->list); + rb_erase(&dc->rb_node, &dcc->root); + dcc->undiscard_blks -= dc->len; + + kmem_cache_free(discard_cmd_slab, dc); + + atomic_dec(&dcc->discard_cmd_cnt); +} + +static void __remove_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_cmd *dc) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + + f2fs_bug_on(sbi, dc->ref); + + if (dc->error == -EOPNOTSUPP) + dc->error = 0; + + if (dc->error) + f2fs_msg(sbi->sb, KERN_INFO, + "Issue discard(%u, %u, %u) failed, ret: %d", + dc->lstart, dc->start, dc->len, dc->error); + __detach_discard_cmd(dcc, dc); +} + +static void f2fs_submit_discard_endio(struct bio *bio) +{ + struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; + + dc->error = bio->bi_error; + dc->state = D_DONE; + complete_all(&dc->wait); + bio_put(bio); +} + +void __check_sit_bitmap(struct f2fs_sb_info *sbi, + block_t start, block_t end) +{ +#ifdef CONFIG_F2FS_CHECK_FS + struct seg_entry *sentry; + unsigned int segno; + block_t blk = start; + unsigned long offset, size, max_blocks = sbi->blocks_per_seg; + unsigned long *map; + + while (blk < end) { + segno = GET_SEGNO(sbi, blk); + sentry = get_seg_entry(sbi, segno); + offset = GET_BLKOFF_FROM_SEG0(sbi, blk); + + if (end < START_BLOCK(sbi, segno + 1)) + size = GET_BLKOFF_FROM_SEG0(sbi, end); + else + size = max_blocks; + map = (unsigned long *)(sentry->cur_valid_map); + offset = __find_rev_next_bit(map, size, offset); + f2fs_bug_on(sbi, offset != size); + blk = START_BLOCK(sbi, segno + 1); } -} - -static void f2fs_submit_bio_wait_endio(struct bio *bio) -{ - struct bio_entry *be = (struct bio_entry *)bio->bi_private; - - be->error = bio->bi_error; - complete(&be->event); +#endif } /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ -int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) +static void __submit_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_cmd *dc) { - struct block_device *bdev = sbi->sb->s_bdev; + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct bio *bio = NULL; - int err; - err = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags, - &bio); - if (!err && bio) { - struct bio_entry *be = __add_bio_entry(sbi, bio); + if (dc->state != D_PREP) + return; - bio->bi_private = be; - bio->bi_end_io = f2fs_submit_bio_wait_endio; - bio->bi_opf |= REQ_SYNC; - submit_bio(bio); + trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len); + + dc->error = __blkdev_issue_discard(dc->bdev, + SECTOR_FROM_BLOCK(dc->start), + SECTOR_FROM_BLOCK(dc->len), + GFP_NOFS, 0, &bio); + if (!dc->error) { + /* should keep before submission to avoid D_DONE right away */ + dc->state = D_SUBMIT; + atomic_inc(&dcc->issued_discard); + atomic_inc(&dcc->issing_discard); + if (bio) { + bio->bi_private = dc; + bio->bi_end_io = f2fs_submit_discard_endio; + bio->bi_opf |= REQ_SYNC; + submit_bio(bio); + list_move_tail(&dc->list, &dcc->wait_list); + __check_sit_bitmap(sbi, dc->start, dc->start + dc->len); + + f2fs_update_iostat(sbi, FS_DISCARD, 1); + } + } else { + __remove_discard_cmd(sbi, dc); + } +} + +static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len, + struct rb_node **insert_p, + struct rb_node *insert_parent) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct rb_node **p = &dcc->root.rb_node; + struct rb_node *parent = NULL; + struct discard_cmd *dc = NULL; + + if (insert_p && insert_parent) { + parent = insert_parent; + p = insert_p; + goto do_insert; } - return err; + p = __lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart); +do_insert: + dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p); + if (!dc) + return NULL; + + return dc; +} + +static void __relocate_discard_cmd(struct discard_cmd_control *dcc, + struct discard_cmd *dc) +{ + list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]); +} + +static void __punch_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_cmd *dc, block_t blkaddr) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_info di = dc->di; + bool modified = false; + + if (dc->state == D_DONE || dc->len == 1) { + __remove_discard_cmd(sbi, dc); + return; + } + + dcc->undiscard_blks -= di.len; + + if (blkaddr > di.lstart) { + dc->len = blkaddr - dc->lstart; + dcc->undiscard_blks += dc->len; + __relocate_discard_cmd(dcc, dc); + modified = true; + } + + if (blkaddr < di.lstart + di.len - 1) { + if (modified) { + __insert_discard_tree(sbi, dc->bdev, blkaddr + 1, + di.start + blkaddr + 1 - di.lstart, + di.lstart + di.len - 1 - blkaddr, + NULL, NULL); + } else { + dc->lstart++; + dc->len--; + dc->start++; + dcc->undiscard_blks += dc->len; + __relocate_discard_cmd(dcc, dc); + } + } +} + +static void __update_discard_tree_range(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *prev_dc = NULL, *next_dc = NULL; + struct discard_cmd *dc; + struct discard_info di = {0}; + struct rb_node **insert_p = NULL, *insert_parent = NULL; + block_t end = lstart + len; + + mutex_lock(&dcc->cmd_lock); + + dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, + NULL, lstart, + (struct rb_entry **)&prev_dc, + (struct rb_entry **)&next_dc, + &insert_p, &insert_parent, true); + if (dc) + prev_dc = dc; + + if (!prev_dc) { + di.lstart = lstart; + di.len = next_dc ? next_dc->lstart - lstart : len; + di.len = min(di.len, len); + di.start = start; + } + + while (1) { + struct rb_node *node; + bool merged = false; + struct discard_cmd *tdc = NULL; + + if (prev_dc) { + di.lstart = prev_dc->lstart + prev_dc->len; + if (di.lstart < lstart) + di.lstart = lstart; + if (di.lstart >= end) + break; + + if (!next_dc || next_dc->lstart > end) + di.len = end - di.lstart; + else + di.len = next_dc->lstart - di.lstart; + di.start = start + di.lstart - lstart; + } + + if (!di.len) + goto next; + + if (prev_dc && prev_dc->state == D_PREP && + prev_dc->bdev == bdev && + __is_discard_back_mergeable(&di, &prev_dc->di)) { + prev_dc->di.len += di.len; + dcc->undiscard_blks += di.len; + __relocate_discard_cmd(dcc, prev_dc); + di = prev_dc->di; + tdc = prev_dc; + merged = true; + } + + if (next_dc && next_dc->state == D_PREP && + next_dc->bdev == bdev && + __is_discard_front_mergeable(&di, &next_dc->di)) { + next_dc->di.lstart = di.lstart; + next_dc->di.len += di.len; + next_dc->di.start = di.start; + dcc->undiscard_blks += di.len; + __relocate_discard_cmd(dcc, next_dc); + if (tdc) + __remove_discard_cmd(sbi, tdc); + merged = true; + } + + if (!merged) { + __insert_discard_tree(sbi, bdev, di.lstart, di.start, + di.len, NULL, NULL); + } + next: + prev_dc = next_dc; + if (!prev_dc) + break; + + node = rb_next(&prev_dc->rb_node); + next_dc = rb_entry_safe(node, struct discard_cmd, rb_node); + } + + mutex_unlock(&dcc->cmd_lock); +} + +static int __queue_discard_cmd(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t blkstart, block_t blklen) +{ + block_t lblkstart = blkstart; + + trace_f2fs_queue_discard(bdev, blkstart, blklen); + + if (sbi->s_ndevs) { + int devi = f2fs_target_device_index(sbi, blkstart); + + blkstart -= FDEV(devi).start_blk; + } + __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); + return 0; +} + +static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *pend_list; + struct discard_cmd *dc, *tmp; + struct blk_plug plug; + int iter = 0, issued = 0; + int i; + bool io_interrupted = false; + + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); + blk_start_plug(&plug); + for (i = MAX_PLIST_NUM - 1; + i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) { + pend_list = &dcc->pend_list[i]; + list_for_each_entry_safe(dc, tmp, pend_list, list) { + f2fs_bug_on(sbi, dc->state != D_PREP); + + /* Hurry up to finish fstrim */ + if (dcc->pend_list_tag[i] & P_TRIM) { + __submit_discard_cmd(sbi, dc); + issued++; + + if (fatal_signal_pending(current)) + break; + continue; + } + + if (!issue_cond) { + __submit_discard_cmd(sbi, dc); + issued++; + continue; + } + + if (is_idle(sbi)) { + __submit_discard_cmd(sbi, dc); + issued++; + } else { + io_interrupted = true; + } + + if (++iter >= DISCARD_ISSUE_RATE) + goto out; + } + if (list_empty(pend_list) && dcc->pend_list_tag[i] & P_TRIM) + dcc->pend_list_tag[i] &= (~P_TRIM); + } +out: + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); + + if (!issued && io_interrupted) + issued = -1; + + return issued; +} + +static void __drop_discard_cmd(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *pend_list; + struct discard_cmd *dc, *tmp; + int i; + + mutex_lock(&dcc->cmd_lock); + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + pend_list = &dcc->pend_list[i]; + list_for_each_entry_safe(dc, tmp, pend_list, list) { + f2fs_bug_on(sbi, dc->state != D_PREP); + __remove_discard_cmd(sbi, dc); + } + } + mutex_unlock(&dcc->cmd_lock); +} + +static void __wait_one_discard_bio(struct f2fs_sb_info *sbi, + struct discard_cmd *dc) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + + wait_for_completion_io(&dc->wait); + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, dc->state != D_DONE); + dc->ref--; + if (!dc->ref) + __remove_discard_cmd(sbi, dc); + mutex_unlock(&dcc->cmd_lock); +} + +static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *wait_list = &(dcc->wait_list); + struct discard_cmd *dc, *tmp; + bool need_wait; + +next: + need_wait = false; + + mutex_lock(&dcc->cmd_lock); + list_for_each_entry_safe(dc, tmp, wait_list, list) { + if (!wait_cond || (dc->state == D_DONE && !dc->ref)) { + wait_for_completion_io(&dc->wait); + __remove_discard_cmd(sbi, dc); + } else { + dc->ref++; + need_wait = true; + break; + } + } + mutex_unlock(&dcc->cmd_lock); + + if (need_wait) { + __wait_one_discard_bio(sbi, dc); + goto next; + } +} + +/* This should be covered by global mutex, &sit_i->sentry_lock */ +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *dc; + bool need_wait = false; + + mutex_lock(&dcc->cmd_lock); + dc = (struct discard_cmd *)__lookup_rb_tree(&dcc->root, NULL, blkaddr); + if (dc) { + if (dc->state == D_PREP) { + __punch_discard_cmd(sbi, dc, blkaddr); + } else { + dc->ref++; + need_wait = true; + } + } + mutex_unlock(&dcc->cmd_lock); + + if (need_wait) + __wait_one_discard_bio(sbi, dc); +} + +void stop_discard_thread(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + + if (dcc && dcc->f2fs_issue_discard) { + struct task_struct *discard_thread = dcc->f2fs_issue_discard; + + dcc->f2fs_issue_discard = NULL; + kthread_stop(discard_thread); + } +} + +/* This comes from f2fs_put_super and f2fs_trim_fs */ +void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount) +{ + __issue_discard_cmd(sbi, false); + __drop_discard_cmd(sbi); + __wait_discard_cmd(sbi, !umount); +} + +static void mark_discard_range_all(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + int i; + + mutex_lock(&dcc->cmd_lock); + for (i = 0; i < MAX_PLIST_NUM; i++) + dcc->pend_list_tag[i] |= P_TRIM; + mutex_unlock(&dcc->cmd_lock); +} + +static int issue_discard_thread(void *data) +{ + struct f2fs_sb_info *sbi = data; + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + wait_queue_head_t *q = &dcc->discard_wait_queue; + unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; + int issued; + + set_freezable(); + + do { + wait_event_interruptible_timeout(*q, + kthread_should_stop() || freezing(current) || + dcc->discard_wake, + msecs_to_jiffies(wait_ms)); + if (try_to_freeze()) + continue; + if (kthread_should_stop()) + return 0; + + if (dcc->discard_wake) { + dcc->discard_wake = 0; + if (sbi->gc_thread && sbi->gc_thread->gc_urgent) + mark_discard_range_all(sbi); + } + + sb_start_intwrite(sbi->sb); + + issued = __issue_discard_cmd(sbi, true); + if (issued) { + __wait_discard_cmd(sbi, true); + wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; + } else { + wait_ms = DEF_MAX_DISCARD_ISSUE_TIME; + } + + sb_end_intwrite(sbi->sb); + + } while (!kthread_should_stop()); + return 0; +} + +#ifdef CONFIG_BLK_DEV_ZONED +static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t blkstart, block_t blklen) +{ + sector_t sector, nr_sects; + block_t lblkstart = blkstart; + int devi = 0; + + if (sbi->s_ndevs) { + devi = f2fs_target_device_index(sbi, blkstart); + blkstart -= FDEV(devi).start_blk; + } + + /* + * We need to know the type of the zone: for conventional zones, + * use regular discard if the drive supports it. For sequential + * zones, reset the zone write pointer. + */ + switch (get_blkz_type(sbi, bdev, blkstart)) { + + case BLK_ZONE_TYPE_CONVENTIONAL: + if (!blk_queue_discard(bdev_get_queue(bdev))) + return 0; + return __queue_discard_cmd(sbi, bdev, lblkstart, blklen); + case BLK_ZONE_TYPE_SEQWRITE_REQ: + case BLK_ZONE_TYPE_SEQWRITE_PREF: + sector = SECTOR_FROM_BLOCK(blkstart); + nr_sects = SECTOR_FROM_BLOCK(blklen); + + if (sector & (bdev_zone_sectors(bdev) - 1) || + nr_sects != bdev_zone_sectors(bdev)) { + f2fs_msg(sbi->sb, KERN_INFO, + "(%d) %s: Unaligned discard attempted (block %x + %x)", + devi, sbi->s_ndevs ? FDEV(devi).path: "", + blkstart, blklen); + return -EIO; + } + trace_f2fs_issue_reset_zone(bdev, blkstart); + return blkdev_reset_zones(bdev, sector, + nr_sects, GFP_NOFS); + default: + /* Unknown zone type: broken device ? */ + return -EIO; + } +} +#endif + +static int __issue_discard_async(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t blkstart, block_t blklen) +{ +#ifdef CONFIG_BLK_DEV_ZONED + if (f2fs_sb_mounted_blkzoned(sbi->sb) && + bdev_zoned_model(bdev) != BLK_ZONED_NONE) + return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); +#endif + return __queue_discard_cmd(sbi, bdev, blkstart, blklen); } static int f2fs_issue_discard(struct f2fs_sb_info *sbi, block_t blkstart, block_t blklen) { - sector_t start = SECTOR_FROM_BLOCK(blkstart); - sector_t len = SECTOR_FROM_BLOCK(blklen); + sector_t start = blkstart, len = 0; + struct block_device *bdev; struct seg_entry *se; unsigned int offset; block_t i; + int err = 0; + + bdev = f2fs_target_device(sbi, blkstart, NULL); + + for (i = blkstart; i < blkstart + blklen; i++, len++) { + if (i != start) { + struct block_device *bdev2 = + f2fs_target_device(sbi, i, NULL); + + if (bdev2 != bdev) { + err = __issue_discard_async(sbi, bdev, + start, len); + if (err) + return err; + bdev = bdev2; + start = i; + len = 0; + } + } - for (i = blkstart; i < blkstart + blklen; i++) { se = get_seg_entry(sbi, GET_SEGNO(sbi, i)); offset = GET_BLKOFF_FROM_SEG0(sbi, i); if (!f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; } - trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); - return __f2fs_issue_discard_async(sbi, start, len, GFP_NOFS, 0); + + if (len) + err = __issue_discard_async(sbi, bdev, start, len); + return err; } -static void __add_discard_entry(struct f2fs_sb_info *sbi, - struct cp_control *cpc, struct seg_entry *se, - unsigned int start, unsigned int end) -{ - struct list_head *head = &SM_I(sbi)->discard_list; - struct discard_entry *new, *last; - - if (!list_empty(head)) { - last = list_last_entry(head, struct discard_entry, list); - if (START_BLOCK(sbi, cpc->trim_start) + start == - last->blkaddr + last->len) { - last->len += end - start; - goto done; - } - } - - new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); - INIT_LIST_HEAD(&new->list); - new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; - new->len = end - start; - list_add_tail(&new->list, head); -done: - SM_I(sbi)->nr_discards += end - start; -} - -static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) +static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, + bool check_only) { int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); int max_blocks = sbi->blocks_per_seg; @@ -709,16 +1379,19 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned long *discard_map = (unsigned long *)se->discard_map; unsigned long *dmap = SIT_I(sbi)->tmp_map; unsigned int start = 0, end = -1; - bool force = (cpc->reason == CP_DISCARD); + bool force = (cpc->reason & CP_DISCARD); + struct discard_entry *de = NULL; + struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; int i; if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi)) - return; + return false; if (!force) { if (!test_opt(sbi, DISCARD) || !se->valid_blocks || - SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards) - return; + SM_I(sbi)->dcc_info->nr_discards >= + SM_I(sbi)->dcc_info->max_discards) + return false; } /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ @@ -726,7 +1399,8 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] : (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; - while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { + while (force || SM_I(sbi)->dcc_info->nr_discards <= + SM_I(sbi)->dcc_info->max_discards) { start = __find_rev_next_bit(dmap, max_blocks, end + 1); if (start >= max_blocks) break; @@ -736,13 +1410,27 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) && (end - start) < cpc->trim_minlen) continue; - __add_discard_entry(sbi, cpc, se, start, end); + if (check_only) + return true; + + if (!de) { + de = f2fs_kmem_cache_alloc(discard_entry_slab, + GFP_F2FS_ZERO); + de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start); + list_add_tail(&de->list, head); + } + + for (i = start; i < end; i++) + __set_bit_le(i, (void *)de->discard_map); + + SM_I(sbi)->dcc_info->nr_discards += end - start; } + return false; } void release_discard_addrs(struct f2fs_sb_info *sbi) { - struct list_head *head = &(SM_I(sbi)->discard_list); + struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); struct discard_entry *entry, *this; /* drop caches */ @@ -768,16 +1456,14 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) { - struct list_head *head = &(SM_I(sbi)->discard_list); + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *head = &dcc->entry_list; struct discard_entry *entry, *this; struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - struct blk_plug plug; unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; unsigned int start = 0, end = -1; unsigned int secno, start_segno; - bool force = (cpc->reason == CP_DISCARD); - - blk_start_plug(&plug); + bool force = (cpc->reason & CP_DISCARD); mutex_lock(&dirty_i->seglist_lock); @@ -794,19 +1480,23 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) dirty_i->nr_dirty[PRE] -= end - start; - if (force || !test_opt(sbi, DISCARD)) + if (!test_opt(sbi, DISCARD)) continue; + if (force && start >= cpc->trim_start && + (end - 1) <= cpc->trim_end) + continue; + if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) { f2fs_issue_discard(sbi, START_BLOCK(sbi, start), (end - start) << sbi->log_blocks_per_seg); continue; } next: - secno = GET_SECNO(sbi, start); - start_segno = secno * sbi->segs_per_sec; + secno = GET_SEC_FROM_SEG(sbi, start); + start_segno = GET_SEG_FROM_SEC(sbi, secno); if (!IS_CURSEC(sbi, secno) && - !get_valid_blocks(sbi, start, sbi->segs_per_sec)) + !get_valid_blocks(sbi, start, true)) f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), sbi->segs_per_sec << sbi->log_blocks_per_seg); @@ -820,17 +1510,100 @@ next: /* send small discards */ list_for_each_entry_safe(entry, this, head, list) { - if (force && entry->len < cpc->trim_minlen) - goto skip; - f2fs_issue_discard(sbi, entry->blkaddr, entry->len); - cpc->trimmed += entry->len; + unsigned int cur_pos = 0, next_pos, len, total_len = 0; + bool is_valid = test_bit_le(0, entry->discard_map); + +find_next: + if (is_valid) { + next_pos = find_next_zero_bit_le(entry->discard_map, + sbi->blocks_per_seg, cur_pos); + len = next_pos - cur_pos; + + if (f2fs_sb_mounted_blkzoned(sbi->sb) || + (force && len < cpc->trim_minlen)) + goto skip; + + f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, + len); + cpc->trimmed += len; + total_len += len; + } else { + next_pos = find_next_bit_le(entry->discard_map, + sbi->blocks_per_seg, cur_pos); + } skip: + cur_pos = next_pos; + is_valid = !is_valid; + + if (cur_pos < sbi->blocks_per_seg) + goto find_next; + list_del(&entry->list); - SM_I(sbi)->nr_discards -= entry->len; + dcc->nr_discards -= total_len; kmem_cache_free(discard_entry_slab, entry); } - blk_finish_plug(&plug); + wake_up_discard_thread(sbi, false); +} + +static int create_discard_cmd_control(struct f2fs_sb_info *sbi) +{ + dev_t dev = sbi->sb->s_bdev->bd_dev; + struct discard_cmd_control *dcc; + int err = 0, i; + + if (SM_I(sbi)->dcc_info) { + dcc = SM_I(sbi)->dcc_info; + goto init_thread; + } + + dcc = kzalloc(sizeof(struct discard_cmd_control), GFP_KERNEL); + if (!dcc) + return -ENOMEM; + + dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; + INIT_LIST_HEAD(&dcc->entry_list); + for (i = 0; i < MAX_PLIST_NUM; i++) { + INIT_LIST_HEAD(&dcc->pend_list[i]); + if (i >= dcc->discard_granularity - 1) + dcc->pend_list_tag[i] |= P_ACTIVE; + } + INIT_LIST_HEAD(&dcc->wait_list); + mutex_init(&dcc->cmd_lock); + atomic_set(&dcc->issued_discard, 0); + atomic_set(&dcc->issing_discard, 0); + atomic_set(&dcc->discard_cmd_cnt, 0); + dcc->nr_discards = 0; + dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; + dcc->undiscard_blks = 0; + dcc->root = RB_ROOT; + + init_waitqueue_head(&dcc->discard_wait_queue); + SM_I(sbi)->dcc_info = dcc; +init_thread: + dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi, + "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); + if (IS_ERR(dcc->f2fs_issue_discard)) { + err = PTR_ERR(dcc->f2fs_issue_discard); + kfree(dcc); + SM_I(sbi)->dcc_info = NULL; + return err; + } + + return err; +} + +static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + + if (!dcc) + return; + + stop_discard_thread(sbi); + + kfree(dcc); + SM_I(sbi)->dcc_info = NULL; } static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) @@ -859,6 +1632,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) struct seg_entry *se; unsigned int segno, offset; long int new_vblocks; + bool exist; +#ifdef CONFIG_F2FS_CHECK_FS + bool mir_exist; +#endif segno = GET_SEGNO(sbi, blkaddr); @@ -875,14 +1652,54 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* Update valid block bitmap */ if (del > 0) { - if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) + exist = f2fs_test_and_set_bit(offset, se->cur_valid_map); +#ifdef CONFIG_F2FS_CHECK_FS + mir_exist = f2fs_test_and_set_bit(offset, + se->cur_valid_map_mir); + if (unlikely(exist != mir_exist)) { + f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " + "when setting bitmap, blk:%u, old bit:%d", + blkaddr, exist); f2fs_bug_on(sbi, 1); + } +#endif + if (unlikely(exist)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Bitmap was wrongly set, blk:%u", blkaddr); + f2fs_bug_on(sbi, 1); + se->valid_blocks--; + del = 0; + } + if (f2fs_discard_en(sbi) && !f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; + + /* don't overwrite by SSR to keep node chain */ + if (se->type == CURSEG_WARM_NODE) { + if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) + se->ckpt_valid_blocks++; + } } else { - if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) + exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map); +#ifdef CONFIG_F2FS_CHECK_FS + mir_exist = f2fs_test_and_clear_bit(offset, + se->cur_valid_map_mir); + if (unlikely(exist != mir_exist)) { + f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " + "when clearing bitmap, blk:%u, old bit:%d", + blkaddr, exist); f2fs_bug_on(sbi, 1); + } +#endif + if (unlikely(!exist)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Bitmap was wrongly cleared, blk:%u", blkaddr); + f2fs_bug_on(sbi, 1); + se->valid_blocks++; + del = 0; + } + if (f2fs_discard_en(sbi) && f2fs_test_and_clear_bit(offset, se->discard_map)) sbi->discard_blks++; @@ -1068,8 +1885,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi, struct free_segmap_info *free_i = FREE_I(sbi); unsigned int segno, secno, zoneno; unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; - unsigned int hint = *newseg / sbi->segs_per_sec; - unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); + unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg); + unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg); unsigned int left_start = hint; bool init = true; int go_left = 0; @@ -1079,8 +1896,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi, if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { segno = find_next_zero_bit(free_i->free_segmap, - (hint + 1) * sbi->segs_per_sec, *newseg + 1); - if (segno < (hint + 1) * sbi->segs_per_sec) + GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1); + if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) goto got_it; } find_other_zone: @@ -1111,8 +1928,8 @@ find_other_zone: secno = left_start; skip_left: hint = secno; - segno = secno * sbi->segs_per_sec; - zoneno = secno / sbi->secs_per_zone; + segno = GET_SEG_FROM_SEC(sbi, secno); + zoneno = GET_ZONE_FROM_SEC(sbi, secno); /* give up on finding another zone */ if (!init) @@ -1156,7 +1973,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) struct summary_footer *sum_footer; curseg->segno = curseg->next_segno; - curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno); + curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno); curseg->next_blkoff = 0; curseg->next_segno = NULL_SEGNO; @@ -1169,6 +1986,20 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) __set_sit_entry_type(sbi, type, curseg->segno, modified); } +static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) +{ + /* if segs_per_sec is large than 1, we need to keep original policy. */ + if (sbi->segs_per_sec != 1) + return CURSEG_I(sbi, type)->segno; + + if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) + return 0; + + if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) + return SIT_I(sbi)->last_victim[ALLOC_NEXT]; + return CURSEG_I(sbi, type)->segno; +} + /* * Allocate a current working segment. * This function always allocates a free segment in LFS manner. @@ -1187,6 +2018,7 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) if (test_opt(sbi, NOHEAP)) dir = ALLOC_RIGHT; + segno = __get_next_segno(sbi, type); get_new_segment(sbi, &segno, new_sec, dir); curseg->next_segno = segno; reset_curseg(sbi, type, 1); @@ -1229,7 +2061,7 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, * This function always allocates a used segment(from dirty seglist) by SSR * manner, so it should recover the existing segment information of valid blocks */ -static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) +static void change_curseg(struct f2fs_sb_info *sbi, int type) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -1250,28 +2082,53 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) curseg->alloc_type = SSR; __next_free_blkoff(sbi, curseg, 0); - if (reuse) { - sum_page = get_sum_page(sbi, new_segno); - sum_node = (struct f2fs_summary_block *)page_address(sum_page); - memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); - f2fs_put_page(sum_page, 1); - } + sum_page = get_sum_page(sbi, new_segno); + sum_node = (struct f2fs_summary_block *)page_address(sum_page); + memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); + f2fs_put_page(sum_page, 1); } static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; + unsigned segno = NULL_SEGNO; + int i, cnt; + bool reversed = false; - if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0, 0)) - return v_ops->get_victim(sbi, - &(curseg)->next_segno, BG_GC, type, SSR); + /* need_SSR() already forces to do this */ + if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) { + curseg->next_segno = segno; + return 1; + } - /* For data segments, let's do SSR more intensively */ - for (; type >= CURSEG_HOT_DATA; type--) - if (v_ops->get_victim(sbi, &(curseg)->next_segno, - BG_GC, type, SSR)) + /* For node segments, let's do SSR more intensively */ + if (IS_NODESEG(type)) { + if (type >= CURSEG_WARM_NODE) { + reversed = true; + i = CURSEG_COLD_NODE; + } else { + i = CURSEG_HOT_NODE; + } + cnt = NR_CURSEG_NODE_TYPE; + } else { + if (type >= CURSEG_WARM_DATA) { + reversed = true; + i = CURSEG_COLD_DATA; + } else { + i = CURSEG_HOT_DATA; + } + cnt = NR_CURSEG_DATA_TYPE; + } + + for (; cnt-- > 0; reversed ? i-- : i++) { + if (i == type) + continue; + if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) { + curseg->next_segno = segno; return 1; + } + } return 0; } @@ -1286,43 +2143,55 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, if (force) new_curseg(sbi, type, true); - else if (type == CURSEG_WARM_NODE) + else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && + type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) new_curseg(sbi, type, false); else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) - change_curseg(sbi, type, true); + change_curseg(sbi, type); else new_curseg(sbi, type, false); stat_inc_seg_type(sbi, curseg); } -static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type) -{ - struct curseg_info *curseg = CURSEG_I(sbi, type); - unsigned int old_segno; - - old_segno = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); - locate_dirty_segment(sbi, old_segno); -} - void allocate_new_segments(struct f2fs_sb_info *sbi) { + struct curseg_info *curseg; + unsigned int old_segno; int i; - if (test_opt(sbi, LFS)) - return; - - for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) - __allocate_new_segments(sbi, i); + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + curseg = CURSEG_I(sbi, i); + old_segno = curseg->segno; + SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); + locate_dirty_segment(sbi, old_segno); + } } static const struct segment_allocation default_salloc_ops = { .allocate_segment = allocate_segment_by_default, }; +bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc) +{ + __u64 trim_start = cpc->trim_start; + bool has_candidate = false; + + mutex_lock(&SIT_I(sbi)->sentry_lock); + for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) { + if (add_discard_addrs(sbi, cpc, true)) { + has_candidate = true; + break; + } + } + mutex_unlock(&SIT_I(sbi)->sentry_lock); + + cpc->trim_start = trim_start; + return has_candidate; +} + int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) { __u64 start = F2FS_BYTES_TO_BLK(range->start); @@ -1373,6 +2242,9 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) schedule(); } + /* It's time to issue all the filed discards */ + mark_discard_range_all(sbi); + f2fs_wait_discard_bios(sbi, false); out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return err; @@ -1386,87 +2258,91 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) return false; } -static int __get_segment_type_2(struct page *page, enum page_type p_type) +static int __get_segment_type_2(struct f2fs_io_info *fio) { - if (p_type == DATA) + if (fio->type == DATA) return CURSEG_HOT_DATA; else return CURSEG_HOT_NODE; } -static int __get_segment_type_4(struct page *page, enum page_type p_type) +static int __get_segment_type_4(struct f2fs_io_info *fio) { - if (p_type == DATA) { - struct inode *inode = page->mapping->host; + if (fio->type == DATA) { + struct inode *inode = fio->page->mapping->host; if (S_ISDIR(inode->i_mode)) return CURSEG_HOT_DATA; else return CURSEG_COLD_DATA; } else { - if (IS_DNODE(page) && is_cold_node(page)) + if (IS_DNODE(fio->page) && is_cold_node(fio->page)) return CURSEG_WARM_NODE; else return CURSEG_COLD_NODE; } } -static int __get_segment_type_6(struct page *page, enum page_type p_type) +static int __get_segment_type_6(struct f2fs_io_info *fio) { - if (p_type == DATA) { - struct inode *inode = page->mapping->host; + if (fio->type == DATA) { + struct inode *inode = fio->page->mapping->host; - if (S_ISDIR(inode->i_mode)) - return CURSEG_HOT_DATA; - else if (is_cold_data(page) || file_is_cold(inode)) + if (is_cold_data(fio->page) || file_is_cold(inode)) return CURSEG_COLD_DATA; - else - return CURSEG_WARM_DATA; + if (is_inode_flag_set(inode, FI_HOT_DATA)) + return CURSEG_HOT_DATA; + return CURSEG_WARM_DATA; } else { - if (IS_DNODE(page)) - return is_cold_node(page) ? CURSEG_WARM_NODE : + if (IS_DNODE(fio->page)) + return is_cold_node(fio->page) ? CURSEG_WARM_NODE : CURSEG_HOT_NODE; - else - return CURSEG_COLD_NODE; + return CURSEG_COLD_NODE; } } -static int __get_segment_type(struct page *page, enum page_type p_type) +static int __get_segment_type(struct f2fs_io_info *fio) { - switch (F2FS_P_SB(page)->active_logs) { + int type = 0; + + switch (fio->sbi->active_logs) { case 2: - return __get_segment_type_2(page, p_type); + type = __get_segment_type_2(fio); + break; case 4: - return __get_segment_type_4(page, p_type); + type = __get_segment_type_4(fio); + break; + case 6: + type = __get_segment_type_6(fio); + break; + default: + f2fs_bug_on(fio->sbi, true); } - /* NR_CURSEG_TYPE(6) logs by default */ - f2fs_bug_on(F2FS_P_SB(page), - F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE); - return __get_segment_type_6(page, p_type); + + if (IS_HOT(type)) + fio->temp = HOT; + else if (IS_WARM(type)) + fio->temp = WARM; + else + fio->temp = COLD; + return type; } void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, - struct f2fs_summary *sum, int type) + struct f2fs_summary *sum, int type, + struct f2fs_io_info *fio, bool add_list) { struct sit_info *sit_i = SIT_I(sbi); - struct curseg_info *curseg; - bool direct_io = (type == CURSEG_DIRECT_IO); - - type = direct_io ? CURSEG_WARM_DATA : type; - - curseg = CURSEG_I(sbi, type); + struct curseg_info *curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); mutex_lock(&sit_i->sentry_lock); - /* direct_io'ed data is aligned to the segment for better performance */ - if (direct_io && curseg->next_blkoff && - !has_not_enough_free_secs(sbi, 0, 0)) - __allocate_new_segments(sbi, type); - *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + f2fs_wait_discard_bio(sbi, *new_blkaddr); + /* * __add_sum_entry should be resided under the curseg_mutex * because, this function updates a summary entry in the @@ -1481,54 +2357,72 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, if (!__has_curseg_space(sbi, type)) sit_i->s_ops->allocate_segment(sbi, type, false); /* - * SIT information should be updated before segment allocation, - * since SSR needs latest valid block information. + * SIT information should be updated after segment allocation, + * since we need to keep dirty segments precisely under SSR. */ refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); mutex_unlock(&sit_i->sentry_lock); - if (page && IS_NODESEG(type)) + if (page && IS_NODESEG(type)) { fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); + f2fs_inode_chksum_set(sbi, page); + } + + if (add_list) { + struct f2fs_bio_info *io; + + INIT_LIST_HEAD(&fio->list); + fio->in_list = true; + io = sbi->write_io[fio->type] + fio->temp; + spin_lock(&io->io_lock); + list_add_tail(&fio->list, &io->io_list); + spin_unlock(&io->io_lock); + } + mutex_unlock(&curseg->curseg_mutex); } static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { - int type = __get_segment_type(fio->page, fio->type); - - if (fio->type == NODE || fio->type == DATA) - mutex_lock(&fio->sbi->wio_mutex[fio->type]); + int type = __get_segment_type(fio); + int err; +reallocate: allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, - &fio->new_blkaddr, sum, type); + &fio->new_blkaddr, sum, type, fio, true); /* writeout dirty page into bdev */ - f2fs_submit_page_mbio(fio); - - if (fio->type == NODE || fio->type == DATA) - mutex_unlock(&fio->sbi->wio_mutex[fio->type]); + err = f2fs_submit_page_write(fio); + if (err == -EAGAIN) { + fio->old_blkaddr = fio->new_blkaddr; + goto reallocate; + } } -void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) +void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, + enum iostat_type io_type) { struct f2fs_io_info fio = { .sbi = sbi, .type = META, .op = REQ_OP_WRITE, - .op_flags = WRITE_SYNC | REQ_META | REQ_PRIO, + .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, .old_blkaddr = page->index, .new_blkaddr = page->index, .page = page, .encrypted_page = NULL, + .in_list = false, }; if (unlikely(page->index >= MAIN_BLKADDR(sbi))) fio.op_flags &= ~REQ_META; set_page_writeback(page); - f2fs_submit_page_mbio(&fio); + f2fs_submit_page_write(&fio); + + f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE); } void write_node_page(unsigned int nid, struct f2fs_io_info *fio) @@ -1537,6 +2431,8 @@ void write_node_page(unsigned int nid, struct f2fs_io_info *fio) set_summary(&sum, nid, 0, 0); do_write_page(&sum, fio); + + f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); } void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) @@ -1550,13 +2446,22 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); do_write_page(&sum, fio); f2fs_update_data_blkaddr(dn, fio->new_blkaddr); + + f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE); } -void rewrite_data_page(struct f2fs_io_info *fio) +int rewrite_data_page(struct f2fs_io_info *fio) { + int err; + fio->new_blkaddr = fio->old_blkaddr; stat_inc_inplace_blocks(fio->sbi); - f2fs_submit_page_mbio(fio); + + err = f2fs_submit_page_bio(fio); + + f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); + + return err; } void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, @@ -1598,7 +2503,7 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, /* change the current segment */ if (segno != curseg->segno) { curseg->next_segno = segno; - change_curseg(sbi, type, true); + change_curseg(sbi, type); } curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); @@ -1617,7 +2522,7 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (recover_curseg) { if (old_cursegno != curseg->segno) { curseg->next_segno = old_cursegno; - change_curseg(sbi, type, true); + change_curseg(sbi, type); } curseg->next_blkoff = old_blkoff; } @@ -1647,7 +2552,8 @@ void f2fs_wait_on_page_writeback(struct page *page, if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); - f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, type, WRITE); + f2fs_submit_merged_write_cond(sbi, page->mapping->host, + 0, page->index, type); if (ordered) wait_on_page_writeback(page); else @@ -1655,8 +2561,7 @@ void f2fs_wait_on_page_writeback(struct page *page, } } -void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi, - block_t blkaddr) +void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr) { struct page *cpage; @@ -2127,9 +3032,9 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) se = get_seg_entry(sbi, segno); /* add discard candidates */ - if (cpc->reason != CP_DISCARD) { + if (!(cpc->reason & CP_DISCARD)) { cpc->trim_start = segno; - add_discard_addrs(sbi, cpc); + add_discard_addrs(sbi, cpc, false); } if (to_journal) { @@ -2163,9 +3068,13 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_bug_on(sbi, !list_empty(head)); f2fs_bug_on(sbi, sit_i->dirty_sentries); out: - if (cpc->reason == CP_DISCARD) { + if (cpc->reason & CP_DISCARD) { + __u64 trim_start = cpc->trim_start; + for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) - add_discard_addrs(sbi, cpc); + add_discard_addrs(sbi, cpc, false); + + cpc->trim_start = trim_start; } mutex_unlock(&sit_i->sentry_lock); @@ -2175,10 +3084,9 @@ out: static int build_sit_info(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); - struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct sit_info *sit_i; unsigned int sit_segs, start; - char *src_bitmap, *dst_bitmap; + char *src_bitmap; unsigned int bitmap_size; /* allocate memory for SIT information */ @@ -2188,13 +3096,13 @@ static int build_sit_info(struct f2fs_sb_info *sbi) SM_I(sbi)->sit_info = sit_i; - sit_i->sentries = f2fs_kvzalloc(MAIN_SEGS(sbi) * + sit_i->sentries = kvzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry), GFP_KERNEL); if (!sit_i->sentries) return -ENOMEM; bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); - sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); + sit_i->dirty_sentries_bitmap = kvzalloc(bitmap_size, GFP_KERNEL); if (!sit_i->dirty_sentries_bitmap) return -ENOMEM; @@ -2207,6 +3115,13 @@ static int build_sit_info(struct f2fs_sb_info *sbi) !sit_i->sentries[start].ckpt_valid_map) return -ENOMEM; +#ifdef CONFIG_F2FS_CHECK_FS + sit_i->sentries[start].cur_valid_map_mir + = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + if (!sit_i->sentries[start].cur_valid_map_mir) + return -ENOMEM; +#endif + if (f2fs_discard_en(sbi)) { sit_i->sentries[start].discard_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); @@ -2220,7 +3135,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) return -ENOMEM; if (sbi->segs_per_sec > 1) { - sit_i->sec_entries = f2fs_kvzalloc(MAIN_SECS(sbi) * + sit_i->sec_entries = kvzalloc(MAIN_SECS(sbi) * sizeof(struct sec_entry), GFP_KERNEL); if (!sit_i->sec_entries) return -ENOMEM; @@ -2233,22 +3148,27 @@ static int build_sit_info(struct f2fs_sb_info *sbi) bitmap_size = __bitmap_size(sbi, SIT_BITMAP); src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); - dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); - if (!dst_bitmap) + sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); + if (!sit_i->sit_bitmap) return -ENOMEM; +#ifdef CONFIG_F2FS_CHECK_FS + sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); + if (!sit_i->sit_bitmap_mir) + return -ENOMEM; +#endif + /* init SIT information */ sit_i->s_ops = &default_salloc_ops; sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; - sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count); - sit_i->sit_bitmap = dst_bitmap; + sit_i->written_valid_blocks = 0; sit_i->bitmap_size = bitmap_size; sit_i->dirty_sentries = 0; sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); - sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec; + sit_i->mounted_time = ktime_get_real_seconds(); mutex_init(&sit_i->sentry_lock); return 0; } @@ -2266,12 +3186,12 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) SM_I(sbi)->free_info = free_i; bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); - free_i->free_segmap = f2fs_kvmalloc(bitmap_size, GFP_KERNEL); + free_i->free_segmap = kvmalloc(bitmap_size, GFP_KERNEL); if (!free_i->free_segmap) return -ENOMEM; sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); - free_i->free_secmap = f2fs_kvmalloc(sec_bitmap_size, GFP_KERNEL); + free_i->free_secmap = kvmalloc(sec_bitmap_size, GFP_KERNEL); if (!free_i->free_secmap) return -ENOMEM; @@ -2324,10 +3244,10 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) int sit_blk_cnt = SIT_BLK_CNT(sbi); unsigned int i, start, end; unsigned int readed, start_blk = 0; - int nrpages = MAX_BIO_BLOCKS(sbi) * 8; do { - readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true); + readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, + META_SIT, true); start = start_blk * sit_i->sents_per_block; end = (start_blk + readed) * sit_i->sents_per_block; @@ -2347,10 +3267,17 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) /* build discard map only one time */ if (f2fs_discard_en(sbi)) { - memcpy(se->discard_map, se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += sbi->blocks_per_seg - - se->valid_blocks; + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, + SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, + se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += + sbi->blocks_per_seg - + se->valid_blocks; + } } if (sbi->segs_per_sec > 1) @@ -2374,10 +3301,15 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) seg_info_from_raw_sit(se, &sit); if (f2fs_discard_en(sbi)) { - memcpy(se->discard_map, se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += old_valid_blocks - - se->valid_blocks; + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, + SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += old_valid_blocks - + se->valid_blocks; + } } if (sbi->segs_per_sec > 1) @@ -2396,6 +3328,9 @@ static void init_free_segmap(struct f2fs_sb_info *sbi) struct seg_entry *sentry = get_seg_entry(sbi, start); if (!sentry->valid_blocks) __set_free(sbi, start); + else + SIT_I(sbi)->written_valid_blocks += + sentry->valid_blocks; } /* set use the current segments */ @@ -2418,7 +3353,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) if (segno >= MAIN_SEGS(sbi)) break; offset = segno + 1; - valid_blocks = get_valid_blocks(sbi, segno, 0); + valid_blocks = get_valid_blocks(sbi, segno, false); if (valid_blocks == sbi->blocks_per_seg || !valid_blocks) continue; if (valid_blocks > sbi->blocks_per_seg) { @@ -2436,7 +3371,7 @@ static int init_victim_secmap(struct f2fs_sb_info *sbi) struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); - dirty_i->victim_secmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); + dirty_i->victim_secmap = kvzalloc(bitmap_size, GFP_KERNEL); if (!dirty_i->victim_secmap) return -ENOMEM; return 0; @@ -2458,7 +3393,7 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); for (i = 0; i < NR_DIRTY_TYPE; i++) { - dirty_i->dirty_segmap[i] = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); + dirty_i->dirty_segmap[i] = kvzalloc(bitmap_size, GFP_KERNEL); if (!dirty_i->dirty_segmap[i]) return -ENOMEM; } @@ -2524,22 +3459,22 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; - - INIT_LIST_HEAD(&sm_info->discard_list); - INIT_LIST_HEAD(&sm_info->wait_list); - sm_info->nr_discards = 0; - sm_info->max_discards = 0; + sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; INIT_LIST_HEAD(&sm_info->sit_entry_set); - if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { + if (!f2fs_readonly(sbi->sb)) { err = create_flush_cmd_control(sbi); if (err) return err; } + err = create_discard_cmd_control(sbi); + if (err) + return err; + err = build_sit_info(sbi); if (err) return err; @@ -2633,6 +3568,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) if (sit_i->sentries) { for (start = 0; start < MAIN_SEGS(sbi); start++) { kfree(sit_i->sentries[start].cur_valid_map); +#ifdef CONFIG_F2FS_CHECK_FS + kfree(sit_i->sentries[start].cur_valid_map_mir); +#endif kfree(sit_i->sentries[start].ckpt_valid_map); kfree(sit_i->sentries[start].discard_map); } @@ -2645,6 +3583,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) SM_I(sbi)->sit_info = NULL; kfree(sit_i->sit_bitmap); +#ifdef CONFIG_F2FS_CHECK_FS + kfree(sit_i->sit_bitmap_mir); +#endif kfree(sit_i); } @@ -2654,7 +3595,8 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi) if (!sm_info) return; - destroy_flush_cmd_control(sbi); + destroy_flush_cmd_control(sbi, true); + destroy_discard_cmd_control(sbi); destroy_dirty_segmap(sbi); destroy_curseg(sbi); destroy_free_segmap(sbi); @@ -2670,15 +3612,15 @@ int __init create_segment_manager_caches(void) if (!discard_entry_slab) goto fail; - bio_entry_slab = f2fs_kmem_cache_create("bio_entry", - sizeof(struct bio_entry)); - if (!bio_entry_slab) + discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd", + sizeof(struct discard_cmd)); + if (!discard_cmd_slab) goto destroy_discard_entry; sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", sizeof(struct sit_entry_set)); if (!sit_entry_set_slab) - goto destroy_bio_entry; + goto destroy_discard_cmd; inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", sizeof(struct inmem_pages)); @@ -2688,8 +3630,8 @@ int __init create_segment_manager_caches(void) destroy_sit_entry_set: kmem_cache_destroy(sit_entry_set_slab); -destroy_bio_entry: - kmem_cache_destroy(bio_entry_slab); +destroy_discard_cmd: + kmem_cache_destroy(discard_cmd_slab); destroy_discard_entry: kmem_cache_destroy(discard_entry_slab); fail: @@ -2699,7 +3641,7 @@ fail: void destroy_segment_manager_caches(void) { kmem_cache_destroy(sit_entry_set_slab); - kmem_cache_destroy(bio_entry_slab); + kmem_cache_destroy(discard_cmd_slab); kmem_cache_destroy(discard_entry_slab); kmem_cache_destroy(inmem_entry_slab); } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index b164f8339281..e0a6cc23ace3 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -18,79 +18,91 @@ #define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */ #define DEF_MAX_RECLAIM_PREFREE_SEGMENTS 4096 /* 8GB in maximum */ -/* L: Logical segment # in volume, R: Relative segment # in main area */ -#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) -#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) +#define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */ -#define IS_DATASEG(t) (t <= CURSEG_COLD_DATA) -#define IS_NODESEG(t) (t >= CURSEG_HOT_NODE) +/* L: Logical segment # in volume, R: Relative segment # in main area */ +#define GET_L2R_SEGNO(free_i, segno) ((segno) - (free_i)->start_segno) +#define GET_R2L_SEGNO(free_i, segno) ((segno) + (free_i)->start_segno) + +#define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA) +#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE) + +#define IS_HOT(t) ((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA) +#define IS_WARM(t) ((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA) +#define IS_COLD(t) ((t) == CURSEG_COLD_NODE || (t) == CURSEG_COLD_DATA) #define IS_CURSEG(sbi, seg) \ - ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) + (((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ + ((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ + ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ + ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ + ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ + ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) #define IS_CURSEC(sbi, secno) \ - ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ - sbi->segs_per_sec)) \ + (((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ + (sbi)->segs_per_sec) || \ + ((secno) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ + (sbi)->segs_per_sec) || \ + ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ + (sbi)->segs_per_sec) || \ + ((secno) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ + (sbi)->segs_per_sec) || \ + ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ + (sbi)->segs_per_sec) || \ + ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ + (sbi)->segs_per_sec)) \ #define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr) #define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr) #define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments) -#define MAIN_SECS(sbi) (sbi->total_sections) +#define MAIN_SECS(sbi) ((sbi)->total_sections) #define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count) -#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg) +#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg) #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) -#define SEGMENT_SIZE(sbi) (1ULL << (sbi->log_blocksize + \ - sbi->log_blocks_per_seg)) +#define SEGMENT_SIZE(sbi) (1ULL << ((sbi)->log_blocksize + \ + (sbi)->log_blocks_per_seg)) #define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \ - (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) + (GET_R2L_SEGNO(FREE_I(sbi), segno) << (sbi)->log_blocks_per_seg)) #define NEXT_FREE_BLKADDR(sbi, curseg) \ - (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) + (START_BLOCK(sbi, (curseg)->segno) + (curseg)->next_blkoff) #define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi)) #define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ - (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> (sbi)->log_blocks_per_seg) #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ - (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1)) + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1)) #define GET_SEGNO(sbi, blk_addr) \ - (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ + ((((blk_addr) == NULL_ADDR) || ((blk_addr) == NEW_ADDR)) ? \ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) -#define GET_SECNO(sbi, segno) \ - ((segno) / sbi->segs_per_sec) -#define GET_ZONENO_FROM_SEGNO(sbi, segno) \ - ((segno / sbi->segs_per_sec) / sbi->secs_per_zone) +#define BLKS_PER_SEC(sbi) \ + ((sbi)->segs_per_sec * (sbi)->blocks_per_seg) +#define GET_SEC_FROM_SEG(sbi, segno) \ + ((segno) / (sbi)->segs_per_sec) +#define GET_SEG_FROM_SEC(sbi, secno) \ + ((secno) * (sbi)->segs_per_sec) +#define GET_ZONE_FROM_SEC(sbi, secno) \ + ((secno) / (sbi)->secs_per_zone) +#define GET_ZONE_FROM_SEG(sbi, segno) \ + GET_ZONE_FROM_SEC(sbi, GET_SEC_FROM_SEG(sbi, segno)) #define GET_SUM_BLOCK(sbi, segno) \ - ((sbi->sm_info->ssa_blkaddr) + segno) + ((sbi)->sm_info->ssa_blkaddr + (segno)) #define GET_SUM_TYPE(footer) ((footer)->entry_type) -#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type) +#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = (type)) #define SIT_ENTRY_OFFSET(sit_i, segno) \ - (segno % sit_i->sents_per_block) + ((segno) % (sit_i)->sents_per_block) #define SIT_BLOCK_OFFSET(segno) \ - (segno / SIT_ENTRY_PER_BLOCK) + ((segno) / SIT_ENTRY_PER_BLOCK) #define START_SEGNO(segno) \ (SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK) #define SIT_BLK_CNT(sbi) \ @@ -101,9 +113,7 @@ #define SECTOR_FROM_BLOCK(blk_addr) \ (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK) #define SECTOR_TO_BLOCK(sectors) \ - (sectors >> F2FS_LOG_SECTORS_PER_BLOCK) -#define MAX_BIO_BLOCKS(sbi) \ - ((int)min((int)max_hw_blocks(sbi), BIO_MAX_PAGES)) + ((sectors) >> F2FS_LOG_SECTORS_PER_BLOCK) /* * indicate a block allocation direction: RIGHT and LEFT. @@ -132,7 +142,10 @@ enum { */ enum { GC_CB = 0, - GC_GREEDY + GC_GREEDY, + ALLOC_NEXT, + FLUSH_DEVICE, + MAX_GC_POLICY, }; /* @@ -164,6 +177,9 @@ struct seg_entry { unsigned int ckpt_valid_blocks:10; /* # of valid blocks last cp */ unsigned int padding:6; /* padding */ unsigned char *cur_valid_map; /* validity bitmap of blocks */ +#ifdef CONFIG_F2FS_CHECK_FS + unsigned char *cur_valid_map_mir; /* mirror of current valid bitmap */ +#endif /* * # of valid blocks and the validity bitmap stored in the the last * checkpoint pack. This information is used by the SSR mode. @@ -186,9 +202,12 @@ struct segment_allocation { * the page is atomically written, and it is in inmem_pages list. */ #define ATOMIC_WRITTEN_PAGE ((unsigned long)-1) +#define DUMMY_WRITTEN_PAGE ((unsigned long)-2) #define IS_ATOMIC_WRITTEN_PAGE(page) \ (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE) +#define IS_DUMMY_WRITTEN_PAGE(page) \ + (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE) struct inmem_pages { struct list_head list; @@ -203,6 +222,9 @@ struct sit_info { block_t sit_blocks; /* # of blocks used by SIT area */ block_t written_valid_blocks; /* # of valid blocks in main area */ char *sit_bitmap; /* SIT bitmap pointer */ +#ifdef CONFIG_F2FS_CHECK_FS + char *sit_bitmap_mir; /* SIT bitmap mirror */ +#endif unsigned int bitmap_size; /* SIT bitmap size */ unsigned long *tmp_map; /* bitmap for temporal use */ @@ -218,6 +240,8 @@ struct sit_info { unsigned long long mounted_time; /* mount time */ unsigned long long min_mtime; /* min. modification time */ unsigned long long max_mtime; /* max. modification time */ + + unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */ }; struct free_segmap_info { @@ -294,17 +318,17 @@ static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); - return &sit_i->sec_entries[GET_SECNO(sbi, segno)]; + return &sit_i->sec_entries[GET_SEC_FROM_SEG(sbi, segno)]; } static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, - unsigned int segno, int section) + unsigned int segno, bool use_section) { /* * In order to get # of valid blocks in a section instantly from many * segments, f2fs manages two counting structures separately. */ - if (section > 1) + if (use_section && sbi->segs_per_sec > 1) return get_sec_entry(sbi, segno)->valid_blocks; else return get_seg_entry(sbi, segno)->valid_blocks; @@ -317,6 +341,9 @@ static inline void seg_info_from_raw_sit(struct seg_entry *se, se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs); memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); +#ifdef CONFIG_F2FS_CHECK_FS + memcpy(se->cur_valid_map_mir, rs->valid_map, SIT_VBLOCK_MAP_SIZE); +#endif se->type = GET_SIT_TYPE(rs); se->mtime = le64_to_cpu(rs->mtime); } @@ -346,8 +373,8 @@ static inline unsigned int find_next_inuse(struct free_segmap_info *free_i, static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) { struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int secno = segno / sbi->segs_per_sec; - unsigned int start_segno = secno * sbi->segs_per_sec; + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; spin_lock(&free_i->segmap_lock); @@ -367,7 +394,8 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi, unsigned int segno) { struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int secno = segno / sbi->segs_per_sec; + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + set_bit(segno, free_i->free_segmap); free_i->free_segments--; if (!test_and_set_bit(secno, free_i->free_secmap)) @@ -378,8 +406,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, unsigned int segno) { struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int secno = segno / sbi->segs_per_sec; - unsigned int start_segno = secno * sbi->segs_per_sec; + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; spin_lock(&free_i->segmap_lock); @@ -400,7 +428,8 @@ static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, unsigned int segno) { struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int secno = segno / sbi->segs_per_sec; + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + spin_lock(&free_i->segmap_lock); if (!test_and_set_bit(segno, free_i->free_segmap)) { free_i->free_segments--; @@ -414,6 +443,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi, void *dst_addr) { struct sit_info *sit_i = SIT_I(sbi); + +#ifdef CONFIG_F2FS_CHECK_FS + if (memcmp(sit_i->sit_bitmap, sit_i->sit_bitmap_mir, + sit_i->bitmap_size)) + f2fs_bug_on(sbi, 1); +#endif memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size); } @@ -457,26 +492,9 @@ static inline int overprovision_segments(struct f2fs_sb_info *sbi) return SM_I(sbi)->ovp_segments; } -static inline int overprovision_sections(struct f2fs_sb_info *sbi) -{ - return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec; -} - static inline int reserved_sections(struct f2fs_sb_info *sbi) { - return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec; -} - -static inline bool need_SSR(struct f2fs_sb_info *sbi) -{ - int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); - int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); - - if (test_opt(sbi, LFS)) - return false; - - return free_sections(sbi) <= (node_secs + 2 * dent_secs + - reserved_sections(sbi) + 1); + return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi)); } static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, @@ -484,14 +502,14 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, { int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); - - node_secs += get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); + int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return false; return (free_sections(sbi) + freed) <= - (node_secs + 2 * dent_secs + reserved_sections(sbi) + needed); + (node_secs + 2 * dent_secs + imeta_secs + + reserved_sections(sbi) + needed); } static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) @@ -521,6 +539,7 @@ static inline int utilization(struct f2fs_sb_info *sbi) */ #define DEF_MIN_IPU_UTIL 70 #define DEF_MIN_FSYNC_BLOCKS 8 +#define DEF_MIN_HOT_BLOCKS 16 enum { F2FS_IPU_FORCE, @@ -528,20 +547,22 @@ enum { F2FS_IPU_UTIL, F2FS_IPU_SSR_UTIL, F2FS_IPU_FSYNC, + F2FS_IPU_ASYNC, }; -static inline bool need_inplace_update(struct inode *inode) +static inline bool need_inplace_update_policy(struct inode *inode, + struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int policy = SM_I(sbi)->ipu_policy; - /* IPU can be done only for the user data */ - if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) - return false; - if (test_opt(sbi, LFS)) return false; + /* if this is cold file, we should overwrite to avoid fragmentation */ + if (file_is_cold(inode)) + return true; + if (policy & (0x1 << F2FS_IPU_FORCE)) return true; if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi)) @@ -553,6 +574,15 @@ static inline bool need_inplace_update(struct inode *inode) utilization(sbi) > SM_I(sbi)->min_ipu_util) return true; + /* + * IPU for rewrite async pages + */ + if (policy & (0x1 << F2FS_IPU_ASYNC) && + fio && fio->op == REQ_OP_WRITE && + !(fio->op_flags & REQ_SYNC) && + !f2fs_encrypted_inode(inode)) + return true; + /* this is only set during fdatasync */ if (policy & (0x1 << F2FS_IPU_FSYNC) && is_inode_flag_set(inode, FI_NEED_IPU)) @@ -633,6 +663,12 @@ static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, check_seg_range(sbi, start); +#ifdef CONFIG_F2FS_CHECK_FS + if (f2fs_test_bit(offset, sit_i->sit_bitmap) != + f2fs_test_bit(offset, sit_i->sit_bitmap_mir)) + f2fs_bug_on(sbi, 1); +#endif + /* calculate sit block address */ if (f2fs_test_bit(offset, sit_i->sit_bitmap)) blk_addr += sit_i->sit_blocks; @@ -658,13 +694,17 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) unsigned int block_off = SIT_BLOCK_OFFSET(start); f2fs_change_bit(block_off, sit_i->sit_bitmap); +#ifdef CONFIG_F2FS_CHECK_FS + f2fs_change_bit(block_off, sit_i->sit_bitmap_mir); +#endif } static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) { struct sit_info *sit_i = SIT_I(sbi); - return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec - - sit_i->mounted_time; + time64_t now = ktime_get_real_seconds(); + + return sit_i->elapsed_time + now - sit_i->mounted_time; } static inline void set_summary(struct f2fs_summary *sum, nid_t nid, @@ -691,7 +731,7 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi, unsigned int secno) { - if (get_valid_blocks(sbi, secno, sbi->segs_per_sec) >= + if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) >= sbi->fggc_threshold) return true; return false; @@ -704,19 +744,12 @@ static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) return false; } -static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi) -{ - struct block_device *bdev = sbi->sb->s_bdev; - struct request_queue *q = bdev_get_queue(bdev); - return SECTOR_TO_BLOCK(queue_max_sectors(q)); -} - /* * It is very important to gather dirty pages and write at once, so that we can * submit a big bio without interfering other data writes. * By default, 512 pages for directory data, - * 512 pages (2MB) * 3 for three types of nodes, and - * max_bio_blocks for meta are set. + * 512 pages (2MB) * 8 for nodes, and + * 256 pages * 8 for meta are set. */ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) { @@ -728,7 +761,7 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) else if (type == NODE) return 8 * sbi->blocks_per_seg; else if (type == META) - return 8 * MAX_BIO_BLOCKS(sbi); + return 8 * BIO_MAX_PAGES; else return 0; } @@ -745,12 +778,35 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type, return 0; nr_to_write = wbc->nr_to_write; - + desired = BIO_MAX_PAGES; if (type == NODE) - desired = 2 * max_hw_blocks(sbi); - else - desired = MAX_BIO_BLOCKS(sbi); + desired <<= 1; wbc->nr_to_write = desired; return desired - nr_to_write; } + +static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + bool wakeup = false; + int i; + + if (force) + goto wake_up; + + mutex_lock(&dcc->cmd_lock); + for (i = MAX_PLIST_NUM - 1; + i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) { + if (!list_empty(&dcc->pend_list[i])) { + wakeup = true; + break; + } + } + mutex_unlock(&dcc->cmd_lock); + if (!wakeup) + return; +wake_up: + dcc->discard_wake = 1; + wake_up_interruptible_all(&dcc->discard_wait_queue); +} diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index 46c915425923..5c60fc28ec75 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -21,14 +21,16 @@ static unsigned int shrinker_run_no; static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi) { - return NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt; + long count = NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt; + + return count > 0 ? count : 0; } static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) { - if (NM_I(sbi)->fcnt > MAX_FREE_NIDS) - return NM_I(sbi)->fcnt - MAX_FREE_NIDS; - return 0; + long count = NM_I(sbi)->nid_cnt[FREE_NID_LIST] - MAX_FREE_NIDS; + + return count > 0 ? count : 0; } static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b81998e9e91d..da59996a859f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -22,8 +22,10 @@ #include #include #include +#include #include #include +#include #include "f2fs.h" #include "node.h" @@ -35,9 +37,7 @@ #define CREATE_TRACE_POINTS #include -static struct proc_dir_entry *f2fs_proc_root; static struct kmem_cache *f2fs_inode_cachep; -static struct kset *f2fs_kset; #ifdef CONFIG_F2FS_FAULT_INJECTION @@ -49,6 +49,7 @@ char *fault_name[FAULT_MAX] = { [FAULT_BLOCK] = "no more block", [FAULT_DIR_DEPTH] = "too big dir depth", [FAULT_EVICT_INODE] = "evict_inode fail", + [FAULT_TRUNCATE] = "truncate fail", [FAULT_IO] = "IO error", [FAULT_CHECKPOINT] = "checkpoint error", }; @@ -82,6 +83,7 @@ enum { Opt_discard, Opt_nodiscard, Opt_noheap, + Opt_heap, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, @@ -89,6 +91,7 @@ enum { Opt_active_logs, Opt_disable_ext_identify, Opt_inline_xattr, + Opt_noinline_xattr, Opt_inline_data, Opt_inline_dentry, Opt_noinline_dentry, @@ -101,9 +104,24 @@ enum { Opt_noinline_data, Opt_data_flush, Opt_mode, + Opt_io_size_bits, Opt_fault_injection, Opt_lazytime, Opt_nolazytime, + Opt_quota, + Opt_noquota, + Opt_usrquota, + Opt_grpquota, + Opt_prjquota, + Opt_usrjquota, + Opt_grpjquota, + Opt_prjjquota, + Opt_offusrjquota, + Opt_offgrpjquota, + Opt_offprjjquota, + Opt_jqfmt_vfsold, + Opt_jqfmt_vfsv0, + Opt_jqfmt_vfsv1, Opt_err, }; @@ -114,6 +132,7 @@ static match_table_t f2fs_tokens = { {Opt_discard, "discard"}, {Opt_nodiscard, "nodiscard"}, {Opt_noheap, "no_heap"}, + {Opt_heap, "heap"}, {Opt_user_xattr, "user_xattr"}, {Opt_nouser_xattr, "nouser_xattr"}, {Opt_acl, "acl"}, @@ -121,6 +140,7 @@ static match_table_t f2fs_tokens = { {Opt_active_logs, "active_logs=%u"}, {Opt_disable_ext_identify, "disable_ext_identify"}, {Opt_inline_xattr, "inline_xattr"}, + {Opt_noinline_xattr, "noinline_xattr"}, {Opt_inline_data, "inline_data"}, {Opt_inline_dentry, "inline_dentry"}, {Opt_noinline_dentry, "noinline_dentry"}, @@ -133,210 +153,27 @@ static match_table_t f2fs_tokens = { {Opt_noinline_data, "noinline_data"}, {Opt_data_flush, "data_flush"}, {Opt_mode, "mode=%s"}, + {Opt_io_size_bits, "io_bits=%u"}, {Opt_fault_injection, "fault_injection=%u"}, {Opt_lazytime, "lazytime"}, {Opt_nolazytime, "nolazytime"}, + {Opt_quota, "quota"}, + {Opt_noquota, "noquota"}, + {Opt_usrquota, "usrquota"}, + {Opt_grpquota, "grpquota"}, + {Opt_prjquota, "prjquota"}, + {Opt_usrjquota, "usrjquota=%s"}, + {Opt_grpjquota, "grpjquota=%s"}, + {Opt_prjjquota, "prjjquota=%s"}, + {Opt_offusrjquota, "usrjquota="}, + {Opt_offgrpjquota, "grpjquota="}, + {Opt_offprjjquota, "prjjquota="}, + {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, + {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, + {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, {Opt_err, NULL}, }; -/* Sysfs support for f2fs */ -enum { - GC_THREAD, /* struct f2fs_gc_thread */ - SM_INFO, /* struct f2fs_sm_info */ - NM_INFO, /* struct f2fs_nm_info */ - F2FS_SBI, /* struct f2fs_sb_info */ -#ifdef CONFIG_F2FS_FAULT_INJECTION - FAULT_INFO_RATE, /* struct f2fs_fault_info */ - FAULT_INFO_TYPE, /* struct f2fs_fault_info */ -#endif -}; - -struct f2fs_attr { - struct attribute attr; - ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); - ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, - const char *, size_t); - int struct_type; - int offset; -}; - -static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) -{ - if (struct_type == GC_THREAD) - return (unsigned char *)sbi->gc_thread; - else if (struct_type == SM_INFO) - return (unsigned char *)SM_I(sbi); - else if (struct_type == NM_INFO) - return (unsigned char *)NM_I(sbi); - else if (struct_type == F2FS_SBI) - return (unsigned char *)sbi; -#ifdef CONFIG_F2FS_FAULT_INJECTION - else if (struct_type == FAULT_INFO_RATE || - struct_type == FAULT_INFO_TYPE) - return (unsigned char *)&sbi->fault_info; -#endif - return NULL; -} - -static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, - struct f2fs_sb_info *sbi, char *buf) -{ - struct super_block *sb = sbi->sb; - - if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); - - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)(sbi->kbytes_written + - BD_PART_WRITTEN(sbi))); -} - -static ssize_t f2fs_sbi_show(struct f2fs_attr *a, - struct f2fs_sb_info *sbi, char *buf) -{ - unsigned char *ptr = NULL; - unsigned int *ui; - - ptr = __struct_ptr(sbi, a->struct_type); - if (!ptr) - return -EINVAL; - - ui = (unsigned int *)(ptr + a->offset); - - return snprintf(buf, PAGE_SIZE, "%u\n", *ui); -} - -static ssize_t f2fs_sbi_store(struct f2fs_attr *a, - struct f2fs_sb_info *sbi, - const char *buf, size_t count) -{ - unsigned char *ptr; - unsigned long t; - unsigned int *ui; - ssize_t ret; - - ptr = __struct_ptr(sbi, a->struct_type); - if (!ptr) - return -EINVAL; - - ui = (unsigned int *)(ptr + a->offset); - - ret = kstrtoul(skip_spaces(buf), 0, &t); - if (ret < 0) - return ret; -#ifdef CONFIG_F2FS_FAULT_INJECTION - if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX)) - return -EINVAL; -#endif - *ui = t; - return count; -} - -static ssize_t f2fs_attr_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, - s_kobj); - struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); - - return a->show ? a->show(a, sbi, buf) : 0; -} - -static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t len) -{ - struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, - s_kobj); - struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); - - return a->store ? a->store(a, sbi, buf, len) : 0; -} - -static void f2fs_sb_release(struct kobject *kobj) -{ - struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, - s_kobj); - complete(&sbi->s_kobj_unregister); -} - -#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \ -static struct f2fs_attr f2fs_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ - .struct_type = _struct_type, \ - .offset = _offset \ -} - -#define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \ - F2FS_ATTR_OFFSET(struct_type, name, 0644, \ - f2fs_sbi_show, f2fs_sbi_store, \ - offsetof(struct struct_name, elname)) - -#define F2FS_GENERAL_RO_ATTR(name) \ -static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL) - -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); -F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); -F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); -F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); -#ifdef CONFIG_F2FS_FAULT_INJECTION -F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); -F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); -#endif -F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); - -#define ATTR_LIST(name) (&f2fs_attr_##name.attr) -static struct attribute *f2fs_attrs[] = { - ATTR_LIST(gc_min_sleep_time), - ATTR_LIST(gc_max_sleep_time), - ATTR_LIST(gc_no_gc_sleep_time), - ATTR_LIST(gc_idle), - ATTR_LIST(reclaim_segments), - ATTR_LIST(max_small_discards), - ATTR_LIST(batched_trim_sections), - ATTR_LIST(ipu_policy), - ATTR_LIST(min_ipu_util), - ATTR_LIST(min_fsync_blocks), - ATTR_LIST(max_victim_search), - ATTR_LIST(dir_level), - ATTR_LIST(ram_thresh), - ATTR_LIST(ra_nid_pages), - ATTR_LIST(dirty_nats_ratio), - ATTR_LIST(cp_interval), - ATTR_LIST(idle_interval), -#ifdef CONFIG_F2FS_FAULT_INJECTION - ATTR_LIST(inject_rate), - ATTR_LIST(inject_type), -#endif - ATTR_LIST(lifetime_write_kbytes), - NULL, -}; - -static const struct sysfs_ops f2fs_attr_ops = { - .show = f2fs_attr_show, - .store = f2fs_attr_store, -}; - -static struct kobj_type f2fs_ktype = { - .default_attrs = f2fs_attrs, - .sysfs_ops = &f2fs_attr_ops, - .release = f2fs_sb_release, -}; - void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) { struct va_format vaf; @@ -345,7 +182,7 @@ void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; - printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); + printk_ratelimited("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); va_end(args); } @@ -356,6 +193,104 @@ static void init_once(void *foo) inode_init_once(&fi->vfs_inode); } +#ifdef CONFIG_QUOTA +static const char * const quotatypes[] = INITQFNAMES; +#define QTYPE2NAME(t) (quotatypes[t]) +static int f2fs_set_qf_name(struct super_block *sb, int qtype, + substring_t *args) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + char *qname; + int ret = -EINVAL; + + if (sb_any_quota_loaded(sb) && !sbi->s_qf_names[qtype]) { + f2fs_msg(sb, KERN_ERR, + "Cannot change journaled " + "quota options when quota turned on"); + return -EINVAL; + } + qname = match_strdup(args); + if (!qname) { + f2fs_msg(sb, KERN_ERR, + "Not enough memory for storing quotafile name"); + return -EINVAL; + } + if (sbi->s_qf_names[qtype]) { + if (strcmp(sbi->s_qf_names[qtype], qname) == 0) + ret = 0; + else + f2fs_msg(sb, KERN_ERR, + "%s quota file already specified", + QTYPE2NAME(qtype)); + goto errout; + } + if (strchr(qname, '/')) { + f2fs_msg(sb, KERN_ERR, + "quotafile must be on filesystem root"); + goto errout; + } + sbi->s_qf_names[qtype] = qname; + set_opt(sbi, QUOTA); + return 0; +errout: + kfree(qname); + return ret; +} + +static int f2fs_clear_qf_name(struct super_block *sb, int qtype) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + if (sb_any_quota_loaded(sb) && sbi->s_qf_names[qtype]) { + f2fs_msg(sb, KERN_ERR, "Cannot change journaled quota options" + " when quota turned on"); + return -EINVAL; + } + kfree(sbi->s_qf_names[qtype]); + sbi->s_qf_names[qtype] = NULL; + return 0; +} + +static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) +{ + /* + * We do the test below only for project quotas. 'usrquota' and + * 'grpquota' mount options are allowed even without quota feature + * to support legacy quotas in quota files. + */ + if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi->sb)) { + f2fs_msg(sbi->sb, KERN_ERR, "Project quota feature not enabled. " + "Cannot enable project quota enforcement."); + return -1; + } + if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA] || + sbi->s_qf_names[PRJQUOTA]) { + if (test_opt(sbi, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) + clear_opt(sbi, USRQUOTA); + + if (test_opt(sbi, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) + clear_opt(sbi, GRPQUOTA); + + if (test_opt(sbi, PRJQUOTA) && sbi->s_qf_names[PRJQUOTA]) + clear_opt(sbi, PRJQUOTA); + + if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) || + test_opt(sbi, PRJQUOTA)) { + f2fs_msg(sbi->sb, KERN_ERR, "old and new quota " + "format mixing"); + return -1; + } + + if (!sbi->s_jquota_fmt) { + f2fs_msg(sbi->sb, KERN_ERR, "journaled quota format " + "not specified"); + return -1; + } + } + return 0; +} +#endif + static int parse_options(struct super_block *sb, char *options) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -363,6 +298,9 @@ static int parse_options(struct super_block *sb, char *options) substring_t args[MAX_OPT_ARGS]; char *p, *name; int arg = 0; +#ifdef CONFIG_QUOTA + int ret; +#endif if (!options) return 0; @@ -412,17 +350,26 @@ static int parse_options(struct super_block *sb, char *options) q = bdev_get_queue(sb->s_bdev); if (blk_queue_discard(q)) { set_opt(sbi, DISCARD); - } else { + } else if (!f2fs_sb_mounted_blkzoned(sb)) { f2fs_msg(sb, KERN_WARNING, "mounting with \"discard\" option, but " "the device does not support discard"); } break; case Opt_nodiscard: + if (f2fs_sb_mounted_blkzoned(sb)) { + f2fs_msg(sb, KERN_WARNING, + "discard is required for zoned block devices"); + return -EINVAL; + } clear_opt(sbi, DISCARD); + break; case Opt_noheap: set_opt(sbi, NOHEAP); break; + case Opt_heap: + clear_opt(sbi, NOHEAP); + break; #ifdef CONFIG_F2FS_FS_XATTR case Opt_user_xattr: set_opt(sbi, XATTR_USER); @@ -433,6 +380,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_inline_xattr: set_opt(sbi, INLINE_XATTR); break; + case Opt_noinline_xattr: + clear_opt(sbi, INLINE_XATTR); + break; #else case Opt_user_xattr: f2fs_msg(sb, KERN_INFO, @@ -446,6 +396,10 @@ static int parse_options(struct super_block *sb, char *options) f2fs_msg(sb, KERN_INFO, "inline_xattr options not supported"); break; + case Opt_noinline_xattr: + f2fs_msg(sb, KERN_INFO, + "noinline_xattr options not supported"); + break; #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL case Opt_acl: @@ -512,6 +466,13 @@ static int parse_options(struct super_block *sb, char *options) return -ENOMEM; if (strlen(name) == 8 && !strncmp(name, "adaptive", 8)) { + if (f2fs_sb_mounted_blkzoned(sb)) { + f2fs_msg(sb, KERN_WARNING, + "adaptive mode is not allowed with " + "zoned block device feature"); + kfree(name); + return -EINVAL; + } set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); } else if (strlen(name) == 3 && !strncmp(name, "lfs", 3)) { @@ -522,11 +483,23 @@ static int parse_options(struct super_block *sb, char *options) } kfree(name); break; + case Opt_io_size_bits: + if (args->from && match_int(args, &arg)) + return -EINVAL; + if (arg > __ilog2_u32(BIO_MAX_PAGES)) { + f2fs_msg(sb, KERN_WARNING, + "Not support %d, larger than %d", + 1 << arg, BIO_MAX_PAGES); + return -EINVAL; + } + sbi->write_io_size_bits = arg; + break; case Opt_fault_injection: if (args->from && match_int(args, &arg)) return -EINVAL; #ifdef CONFIG_F2FS_FAULT_INJECTION f2fs_build_fault_attr(sbi, arg); + set_opt(sbi, FAULT_INJECTION); #else f2fs_msg(sb, KERN_INFO, "FAULT_INJECTION was not selected"); @@ -538,6 +511,81 @@ static int parse_options(struct super_block *sb, char *options) case Opt_nolazytime: sb->s_flags &= ~MS_LAZYTIME; break; +#ifdef CONFIG_QUOTA + case Opt_quota: + case Opt_usrquota: + set_opt(sbi, USRQUOTA); + break; + case Opt_grpquota: + set_opt(sbi, GRPQUOTA); + break; + case Opt_prjquota: + set_opt(sbi, PRJQUOTA); + break; + case Opt_usrjquota: + ret = f2fs_set_qf_name(sb, USRQUOTA, &args[0]); + if (ret) + return ret; + break; + case Opt_grpjquota: + ret = f2fs_set_qf_name(sb, GRPQUOTA, &args[0]); + if (ret) + return ret; + break; + case Opt_prjjquota: + ret = f2fs_set_qf_name(sb, PRJQUOTA, &args[0]); + if (ret) + return ret; + break; + case Opt_offusrjquota: + ret = f2fs_clear_qf_name(sb, USRQUOTA); + if (ret) + return ret; + break; + case Opt_offgrpjquota: + ret = f2fs_clear_qf_name(sb, GRPQUOTA); + if (ret) + return ret; + break; + case Opt_offprjjquota: + ret = f2fs_clear_qf_name(sb, PRJQUOTA); + if (ret) + return ret; + break; + case Opt_jqfmt_vfsold: + sbi->s_jquota_fmt = QFMT_VFS_OLD; + break; + case Opt_jqfmt_vfsv0: + sbi->s_jquota_fmt = QFMT_VFS_V0; + break; + case Opt_jqfmt_vfsv1: + sbi->s_jquota_fmt = QFMT_VFS_V1; + break; + case Opt_noquota: + clear_opt(sbi, QUOTA); + clear_opt(sbi, USRQUOTA); + clear_opt(sbi, GRPQUOTA); + clear_opt(sbi, PRJQUOTA); + break; +#else + case Opt_quota: + case Opt_usrquota: + case Opt_grpquota: + case Opt_prjquota: + case Opt_usrjquota: + case Opt_grpjquota: + case Opt_prjjquota: + case Opt_offusrjquota: + case Opt_offgrpjquota: + case Opt_offprjjquota: + case Opt_jqfmt_vfsold: + case Opt_jqfmt_vfsv0: + case Opt_jqfmt_vfsv1: + case Opt_noquota: + f2fs_msg(sb, KERN_INFO, + "quota operations not supported"); + break; +#endif default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -545,6 +593,17 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; } } +#ifdef CONFIG_QUOTA + if (f2fs_check_quota_options(sbi)) + return -EINVAL; +#endif + + if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) { + f2fs_msg(sb, KERN_ERR, + "Should set mode=lfs with %uKB-sized IO", + F2FS_IO_SIZE_KB(sbi)); + return -EINVAL; + } return 0; } @@ -570,14 +629,22 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) mutex_init(&fi->inmem_lock); init_rwsem(&fi->dio_rwsem[READ]); init_rwsem(&fi->dio_rwsem[WRITE]); + init_rwsem(&fi->i_mmap_sem); + init_rwsem(&fi->i_xattr_sem); +#ifdef CONFIG_QUOTA + memset(&fi->i_dquot, 0, sizeof(fi->i_dquot)); + fi->i_reserved_quota = 0; +#endif /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; + return &fi->vfs_inode; } static int f2fs_drop_inode(struct inode *inode) { + int ret; /* * This is to avoid a deadlock condition like below. * writeback_single_inode(inode) @@ -610,30 +677,33 @@ static int f2fs_drop_inode(struct inode *inode) spin_lock(&inode->i_lock); atomic_dec(&inode->i_count); } + trace_f2fs_drop_inode(inode, 0); return 0; } - - return generic_drop_inode(inode); + ret = generic_drop_inode(inode); + trace_f2fs_drop_inode(inode, ret); + return ret; } -int f2fs_inode_dirtied(struct inode *inode) +int f2fs_inode_dirtied(struct inode *inode, bool sync) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + int ret = 0; spin_lock(&sbi->inode_lock[DIRTY_META]); if (is_inode_flag_set(inode, FI_DIRTY_INODE)) { - spin_unlock(&sbi->inode_lock[DIRTY_META]); - return 1; + ret = 1; + } else { + set_inode_flag(inode, FI_DIRTY_INODE); + stat_inc_dirty_inode(sbi, DIRTY_META); } - - set_inode_flag(inode, FI_DIRTY_INODE); - list_add_tail(&F2FS_I(inode)->gdirty_list, + if (sync && list_empty(&F2FS_I(inode)->gdirty_list)) { + list_add_tail(&F2FS_I(inode)->gdirty_list, &sbi->inode_list[DIRTY_META]); - inc_page_count(sbi, F2FS_DIRTY_IMETA); - stat_inc_dirty_inode(sbi, DIRTY_META); + inc_page_count(sbi, F2FS_DIRTY_IMETA); + } spin_unlock(&sbi->inode_lock[DIRTY_META]); - - return 0; + return ret; } void f2fs_inode_synced(struct inode *inode) @@ -645,10 +715,12 @@ void f2fs_inode_synced(struct inode *inode) spin_unlock(&sbi->inode_lock[DIRTY_META]); return; } - list_del_init(&F2FS_I(inode)->gdirty_list); + if (!list_empty(&F2FS_I(inode)->gdirty_list)) { + list_del_init(&F2FS_I(inode)->gdirty_list); + dec_page_count(sbi, F2FS_DIRTY_IMETA); + } clear_inode_flag(inode, FI_DIRTY_INODE); clear_inode_flag(inode, FI_AUTO_RECOVER); - dec_page_count(sbi, F2FS_DIRTY_IMETA); stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META); spin_unlock(&sbi->inode_lock[DIRTY_META]); } @@ -672,7 +744,7 @@ static void f2fs_dirty_inode(struct inode *inode, int flags) if (is_inode_flag_set(inode, FI_AUTO_RECOVER)) clear_inode_flag(inode, FI_AUTO_RECOVER); - f2fs_inode_dirtied(inode); + f2fs_inode_dirtied(inode, false); } static void f2fs_i_callback(struct rcu_head *head) @@ -692,18 +764,25 @@ static void destroy_percpu_info(struct f2fs_sb_info *sbi) percpu_counter_destroy(&sbi->total_valid_inode_count); } +static void destroy_device_list(struct f2fs_sb_info *sbi) +{ + int i; + + for (i = 0; i < sbi->s_ndevs; i++) { + blkdev_put(FDEV(i).bdev, FMODE_EXCL); +#ifdef CONFIG_BLK_DEV_ZONED + kfree(FDEV(i).blkz_type); +#endif + } + kfree(sbi->devs); +} + static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); + int i; - if (sbi->s_proc) { - remove_proc_entry("segment_info", sbi->s_proc); - remove_proc_entry("segment_bits", sbi->s_proc); - remove_proc_entry(sb->s_id, f2fs_proc_root); - } - kobject_del(&sbi->s_kobj); - - stop_gc_thread(sbi); + f2fs_quota_off_umount(sb); /* prevent remaining shrinker jobs */ mutex_lock(&sbi->umount_mutex); @@ -721,6 +800,16 @@ static void f2fs_put_super(struct super_block *sb) write_checkpoint(sbi, &cpc); } + /* be sure to wait for any on-going discard commands */ + f2fs_wait_discard_bios(sbi, true); + + if (f2fs_discard_en(sbi) && !sbi->discard_blks) { + struct cp_control cpc = { + .reason = CP_UMOUNT | CP_TRIMMED, + }; + write_checkpoint(sbi, &cpc); + } + /* write_checkpoint can update stat informaion */ f2fs_destroy_stats(sbi); @@ -729,13 +818,12 @@ static void f2fs_put_super(struct super_block *sb) * In addition, EIO will skip do checkpoint, we need this as well. */ release_ino_entry(sbi, true); - release_discard_addrs(sbi); f2fs_leave_shrinker(sbi); mutex_unlock(&sbi->umount_mutex); /* our cp_error case, we can wait for any writeback page */ - f2fs_flush_merged_bios(sbi); + f2fs_flush_merged_writes(sbi); iput(sbi->node_inode); iput(sbi->meta_inode); @@ -745,15 +833,23 @@ static void f2fs_put_super(struct super_block *sb) destroy_segment_manager(sbi); kfree(sbi->ckpt); - kobject_put(&sbi->s_kobj); - wait_for_completion(&sbi->s_kobj_unregister); + + f2fs_unregister_sysfs(sbi); sb->s_fs_info = NULL; if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); kfree(sbi->raw_super); + destroy_device_list(sbi); + mempool_destroy(sbi->write_io_dummy); +#ifdef CONFIG_QUOTA + for (i = 0; i < MAXQUOTAS; i++) + kfree(sbi->s_qf_names[i]); +#endif destroy_percpu_info(sbi); + for (i = 0; i < NR_PAGE_TYPE; i++) + kfree(sbi->write_io[i]); kfree(sbi); } @@ -764,6 +860,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync) trace_f2fs_sync_fs(sb, sync); + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + return -EAGAIN; + if (sync) { struct cp_control cpc; @@ -780,13 +879,17 @@ int f2fs_sync_fs(struct super_block *sb, int sync) static int f2fs_freeze(struct super_block *sb) { - int err; - if (f2fs_readonly(sb)) return 0; - err = f2fs_sync_fs(sb, 1); - return err; + /* IO error happened before */ + if (unlikely(f2fs_cp_error(F2FS_SB(sb)))) + return -EIO; + + /* must be clean, since sync_filesystem() was already called */ + if (is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY)) + return -EINVAL; + return 0; } static int f2fs_unfreeze(struct super_block *sb) @@ -794,12 +897,55 @@ static int f2fs_unfreeze(struct super_block *sb) return 0; } +#ifdef CONFIG_QUOTA +static int f2fs_statfs_project(struct super_block *sb, + kprojid_t projid, struct kstatfs *buf) +{ + struct kqid qid; + struct dquot *dquot; + u64 limit; + u64 curblock; + + qid = make_kqid_projid(projid); + dquot = dqget(sb, qid); + if (IS_ERR(dquot)) + return PTR_ERR(dquot); + spin_lock(&dq_data_lock); + + limit = (dquot->dq_dqb.dqb_bsoftlimit ? + dquot->dq_dqb.dqb_bsoftlimit : + dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits; + if (limit && buf->f_blocks > limit) { + curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits; + buf->f_blocks = limit; + buf->f_bfree = buf->f_bavail = + (buf->f_blocks > curblock) ? + (buf->f_blocks - curblock) : 0; + } + + limit = dquot->dq_dqb.dqb_isoftlimit ? + dquot->dq_dqb.dqb_isoftlimit : + dquot->dq_dqb.dqb_ihardlimit; + if (limit && buf->f_files > limit) { + buf->f_files = limit; + buf->f_ffree = + (buf->f_files > dquot->dq_dqb.dqb_curinodes) ? + (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0; + } + + spin_unlock(&dq_data_lock); + dqput(dquot); + return 0; +} +#endif + static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct f2fs_sb_info *sbi = F2FS_SB(sb); u64 id = huge_encode_dev(sb->s_bdev->bd_dev); block_t total_count, user_block_count, start_count, ovp_count; + u64 avail_node_count; total_count = le64_to_cpu(sbi->raw_super->block_count); user_block_count = sbi->user_block_count; @@ -810,18 +956,67 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = total_count - start_count; buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count; - buf->f_bavail = user_block_count - valid_user_blocks(sbi); + buf->f_bavail = user_block_count - valid_user_blocks(sbi) - + sbi->reserved_blocks; - buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; - buf->f_ffree = buf->f_files - valid_inode_count(sbi); + avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; + + if (avail_node_count > user_block_count) { + buf->f_files = user_block_count; + buf->f_ffree = buf->f_bavail; + } else { + buf->f_files = avail_node_count; + buf->f_ffree = min(avail_node_count - valid_node_count(sbi), + buf->f_bavail); + } buf->f_namelen = F2FS_NAME_LEN; buf->f_fsid.val[0] = (u32)id; buf->f_fsid.val[1] = (u32)(id >> 32); +#ifdef CONFIG_QUOTA + if (is_inode_flag_set(dentry->d_inode, FI_PROJ_INHERIT) && + sb_has_quota_limits_enabled(sb, PRJQUOTA)) { + f2fs_statfs_project(sb, F2FS_I(dentry->d_inode)->i_projid, buf); + } +#endif return 0; } +static inline void f2fs_show_quota_options(struct seq_file *seq, + struct super_block *sb) +{ +#ifdef CONFIG_QUOTA + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + if (sbi->s_jquota_fmt) { + char *fmtname = ""; + + switch (sbi->s_jquota_fmt) { + case QFMT_VFS_OLD: + fmtname = "vfsold"; + break; + case QFMT_VFS_V0: + fmtname = "vfsv0"; + break; + case QFMT_VFS_V1: + fmtname = "vfsv1"; + break; + } + seq_printf(seq, ",jqfmt=%s", fmtname); + } + + if (sbi->s_qf_names[USRQUOTA]) + seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]); + + if (sbi->s_qf_names[GRPQUOTA]) + seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]); + + if (sbi->s_qf_names[PRJQUOTA]) + seq_show_option(seq, "prjjquota", sbi->s_qf_names[PRJQUOTA]); +#endif +} + static int f2fs_show_options(struct seq_file *seq, struct dentry *root) { struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); @@ -839,7 +1034,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) if (test_opt(sbi, DISCARD)) seq_puts(seq, ",discard"); if (test_opt(sbi, NOHEAP)) - seq_puts(seq, ",no_heap_alloc"); + seq_puts(seq, ",no_heap"); + else + seq_puts(seq, ",heap"); #ifdef CONFIG_F2FS_FS_XATTR if (test_opt(sbi, XATTR_USER)) seq_puts(seq, ",user_xattr"); @@ -847,6 +1044,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",nouser_xattr"); if (test_opt(sbi, INLINE_XATTR)) seq_puts(seq, ",inline_xattr"); + else + seq_puts(seq, ",noinline_xattr"); #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL if (test_opt(sbi, POSIX_ACL)) @@ -883,89 +1082,42 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (test_opt(sbi, LFS)) seq_puts(seq, "lfs"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); + if (F2FS_IO_SIZE_BITS(sbi)) + seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi)); +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (test_opt(sbi, FAULT_INJECTION)) + seq_printf(seq, ",fault_injection=%u", + sbi->fault_info.inject_rate); +#endif +#ifdef CONFIG_QUOTA + if (test_opt(sbi, QUOTA)) + seq_puts(seq, ",quota"); + if (test_opt(sbi, USRQUOTA)) + seq_puts(seq, ",usrquota"); + if (test_opt(sbi, GRPQUOTA)) + seq_puts(seq, ",grpquota"); + if (test_opt(sbi, PRJQUOTA)) + seq_puts(seq, ",prjquota"); +#endif + f2fs_show_quota_options(seq, sbi->sb); return 0; } -static int segment_info_seq_show(struct seq_file *seq, void *offset) -{ - struct super_block *sb = seq->private; - struct f2fs_sb_info *sbi = F2FS_SB(sb); - unsigned int total_segs = - le32_to_cpu(sbi->raw_super->segment_count_main); - int i; - - seq_puts(seq, "format: segment_type|valid_blocks\n" - "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); - - for (i = 0; i < total_segs; i++) { - struct seg_entry *se = get_seg_entry(sbi, i); - - if ((i % 10) == 0) - seq_printf(seq, "%-10d", i); - seq_printf(seq, "%d|%-3u", se->type, - get_valid_blocks(sbi, i, 1)); - if ((i % 10) == 9 || i == (total_segs - 1)) - seq_putc(seq, '\n'); - else - seq_putc(seq, ' '); - } - - return 0; -} - -static int segment_bits_seq_show(struct seq_file *seq, void *offset) -{ - struct super_block *sb = seq->private; - struct f2fs_sb_info *sbi = F2FS_SB(sb); - unsigned int total_segs = - le32_to_cpu(sbi->raw_super->segment_count_main); - int i, j; - - seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n" - "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); - - for (i = 0; i < total_segs; i++) { - struct seg_entry *se = get_seg_entry(sbi, i); - - seq_printf(seq, "%-10d", i); - seq_printf(seq, "%d|%-3u|", se->type, - get_valid_blocks(sbi, i, 1)); - for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) - seq_printf(seq, " %.2x", se->cur_valid_map[j]); - seq_putc(seq, '\n'); - } - return 0; -} - -#define F2FS_PROC_FILE_DEF(_name) \ -static int _name##_open_fs(struct inode *inode, struct file *file) \ -{ \ - return single_open(file, _name##_seq_show, PDE_DATA(inode)); \ -} \ - \ -static const struct file_operations f2fs_seq_##_name##_fops = { \ - .open = _name##_open_fs, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ -}; - -F2FS_PROC_FILE_DEF(segment_info); -F2FS_PROC_FILE_DEF(segment_bits); - static void default_options(struct f2fs_sb_info *sbi) { /* init some FS parameters */ sbi->active_logs = NR_CURSEG_TYPE; set_opt(sbi, BG_GC); + set_opt(sbi, INLINE_XATTR); set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DENTRY); set_opt(sbi, EXTENT_CACHE); + set_opt(sbi, NOHEAP); sbi->sb->s_flags |= MS_LAZYTIME; set_opt(sbi, FLUSH_MERGE); - if (f2fs_sb_mounted_hmsmr(sbi->sb)) { + if (f2fs_sb_mounted_blkzoned(sbi->sb)) { set_opt_mode(sbi, F2FS_MOUNT_LFS); set_opt(sbi, DISCARD); } else { @@ -988,6 +1140,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) { struct f2fs_sb_info *sbi = F2FS_SB(sb); struct f2fs_mount_info org_mount_opt; + unsigned long old_sb_flags; int err, active_logs; bool need_restart_gc = false; bool need_stop_gc = false; @@ -995,14 +1148,37 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) #ifdef CONFIG_F2FS_FAULT_INJECTION struct f2fs_fault_info ffi = sbi->fault_info; #endif +#ifdef CONFIG_QUOTA + int s_jquota_fmt; + char *s_qf_names[MAXQUOTAS]; + int i, j; +#endif /* * Save the old mount options in case we * need to restore them. */ org_mount_opt = sbi->mount_opt; + old_sb_flags = sb->s_flags; active_logs = sbi->active_logs; +#ifdef CONFIG_QUOTA + s_jquota_fmt = sbi->s_jquota_fmt; + for (i = 0; i < MAXQUOTAS; i++) { + if (sbi->s_qf_names[i]) { + s_qf_names[i] = kstrdup(sbi->s_qf_names[i], + GFP_KERNEL); + if (!s_qf_names[i]) { + for (j = 0; j < i; j++) + kfree(s_qf_names[j]); + return -ENOMEM; + } + } else { + s_qf_names[i] = NULL; + } + } +#endif + /* recover superblocks we couldn't write due to previous RO mount */ if (!(*flags & MS_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { err = f2fs_commit_super(sbi, false); @@ -1012,7 +1188,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) clear_sbi_flag(sbi, SBI_NEED_SB_WRITE); } - sbi->mount_opt.opt = 0; default_options(sbi); /* parse mount options */ @@ -1027,6 +1202,16 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if (f2fs_readonly(sb) && (*flags & MS_RDONLY)) goto skip; + if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) { + err = dquot_suspend(sb, -1); + if (err < 0) + goto restore_opts; + } else { + /* dquot_resume needs RW */ + sb->s_flags &= ~MS_RDONLY; + dquot_resume(sb, -1); + } + /* disallow enable/disable extent_cache dynamically */ if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) { err = -EINVAL; @@ -1067,13 +1252,19 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * or if flush_merge is not passed in mount option. */ if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { - destroy_flush_cmd_control(sbi); - } else if (!SM_I(sbi)->cmd_control_info) { + clear_opt(sbi, FLUSH_MERGE); + destroy_flush_cmd_control(sbi, false); + } else { err = create_flush_cmd_control(sbi); if (err) goto restore_gc; } skip: +#ifdef CONFIG_QUOTA + /* Release old quota file names */ + for (i = 0; i < MAXQUOTAS; i++) + kfree(s_qf_names[i]); +#endif /* Update the POSIXACL Flag */ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); @@ -1088,21 +1279,286 @@ restore_gc: stop_gc_thread(sbi); } restore_opts: +#ifdef CONFIG_QUOTA + sbi->s_jquota_fmt = s_jquota_fmt; + for (i = 0; i < MAXQUOTAS; i++) { + kfree(sbi->s_qf_names[i]); + sbi->s_qf_names[i] = s_qf_names[i]; + } +#endif sbi->mount_opt = org_mount_opt; sbi->active_logs = active_logs; + sb->s_flags = old_sb_flags; #ifdef CONFIG_F2FS_FAULT_INJECTION sbi->fault_info = ffi; #endif return err; } -static struct super_operations f2fs_sops = { +#ifdef CONFIG_QUOTA +/* Read data from quotafile */ +static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, + size_t len, loff_t off) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + struct address_space *mapping = inode->i_mapping; + block_t blkidx = F2FS_BYTES_TO_BLK(off); + int offset = off & (sb->s_blocksize - 1); + int tocopy; + size_t toread; + loff_t i_size = i_size_read(inode); + struct page *page; + char *kaddr; + + if (off > i_size) + return 0; + + if (off + len > i_size) + len = i_size - off; + toread = len; + while (toread > 0) { + tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread); +repeat: + page = read_mapping_page(mapping, blkidx, NULL); + if (IS_ERR(page)) + return PTR_ERR(page); + + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + f2fs_put_page(page, 1); + goto repeat; + } + if (unlikely(!PageUptodate(page))) { + f2fs_put_page(page, 1); + return -EIO; + } + + kaddr = kmap_atomic(page); + memcpy(data, kaddr + offset, tocopy); + kunmap_atomic(kaddr); + f2fs_put_page(page, 1); + + offset = 0; + toread -= tocopy; + data += tocopy; + blkidx++; + } + return len; +} + +/* Write to quotafile */ +static ssize_t f2fs_quota_write(struct super_block *sb, int type, + const char *data, size_t len, loff_t off) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + struct address_space *mapping = inode->i_mapping; + const struct address_space_operations *a_ops = mapping->a_ops; + int offset = off & (sb->s_blocksize - 1); + size_t towrite = len; + struct page *page; + char *kaddr; + int err = 0; + int tocopy; + + while (towrite > 0) { + tocopy = min_t(unsigned long, sb->s_blocksize - offset, + towrite); + + err = a_ops->write_begin(NULL, mapping, off, tocopy, 0, + &page, NULL); + if (unlikely(err)) + break; + + kaddr = kmap_atomic(page); + memcpy(kaddr + offset, data, tocopy); + kunmap_atomic(kaddr); + flush_dcache_page(page); + + a_ops->write_end(NULL, mapping, off, tocopy, tocopy, + page, NULL); + offset = 0; + towrite -= tocopy; + off += tocopy; + data += tocopy; + cond_resched(); + } + + if (len == towrite) + return 0; + inode->i_version++; + inode->i_mtime = inode->i_ctime = current_time(inode); + f2fs_mark_inode_dirty_sync(inode, false); + return len - towrite; +} + +static struct dquot **f2fs_get_dquots(struct inode *inode) +{ + return F2FS_I(inode)->i_dquot; +} + +static qsize_t *f2fs_get_reserved_space(struct inode *inode) +{ + return &F2FS_I(inode)->i_reserved_quota; +} + +static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type) +{ + return dquot_quota_on_mount(sbi->sb, sbi->s_qf_names[type], + sbi->s_jquota_fmt, type); +} + +void f2fs_enable_quota_files(struct f2fs_sb_info *sbi) +{ + int i, ret; + + for (i = 0; i < MAXQUOTAS; i++) { + if (sbi->s_qf_names[i]) { + ret = f2fs_quota_on_mount(sbi, i); + if (ret < 0) + f2fs_msg(sbi->sb, KERN_ERR, + "Cannot turn on journaled " + "quota: error %d", ret); + } + } +} + +static int f2fs_quota_sync(struct super_block *sb, int type) +{ + struct quota_info *dqopt = sb_dqopt(sb); + int cnt; + int ret; + + ret = dquot_writeback_dquots(sb, type); + if (ret) + return ret; + + /* + * Now when everything is written we can discard the pagecache so + * that userspace sees the changes. + */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (type != -1 && cnt != type) + continue; + if (!sb_has_quota_active(sb, cnt)) + continue; + + ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping); + if (ret) + return ret; + + inode_lock(dqopt->files[cnt]); + truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); + inode_unlock(dqopt->files[cnt]); + } + return 0; +} + +static int f2fs_quota_on(struct super_block *sb, int type, int format_id, + struct path *path) +{ + struct inode *inode; + int err; + + err = f2fs_quota_sync(sb, type); + if (err) + return err; + + err = dquot_quota_on(sb, type, format_id, path); + if (err) + return err; + + inode = d_inode(path->dentry); + + inode_lock(inode); + F2FS_I(inode)->i_flags |= FS_NOATIME_FL | FS_IMMUTABLE_FL; + inode_set_flags(inode, S_NOATIME | S_IMMUTABLE, + S_NOATIME | S_IMMUTABLE); + inode_unlock(inode); + f2fs_mark_inode_dirty_sync(inode, false); + + return 0; +} + +static int f2fs_quota_off(struct super_block *sb, int type) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + int err; + + if (!inode || !igrab(inode)) + return dquot_quota_off(sb, type); + + f2fs_quota_sync(sb, type); + + err = dquot_quota_off(sb, type); + if (err) + goto out_put; + + inode_lock(inode); + F2FS_I(inode)->i_flags &= ~(FS_NOATIME_FL | FS_IMMUTABLE_FL); + inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); + inode_unlock(inode); + f2fs_mark_inode_dirty_sync(inode, false); +out_put: + iput(inode); + return err; +} + +void f2fs_quota_off_umount(struct super_block *sb) +{ + int type; + + for (type = 0; type < MAXQUOTAS; type++) + f2fs_quota_off(sb, type); +} + +int f2fs_get_projid(struct inode *inode, kprojid_t *projid) +{ + *projid = F2FS_I(inode)->i_projid; + return 0; +} + +static const struct dquot_operations f2fs_quota_operations = { + .get_reserved_space = f2fs_get_reserved_space, + .write_dquot = dquot_commit, + .acquire_dquot = dquot_acquire, + .release_dquot = dquot_release, + .mark_dirty = dquot_mark_dquot_dirty, + .write_info = dquot_commit_info, + .alloc_dquot = dquot_alloc, + .destroy_dquot = dquot_destroy, + .get_projid = f2fs_get_projid, + .get_next_id = dquot_get_next_id, +}; + +static const struct quotactl_ops f2fs_quotactl_ops = { + .quota_on = f2fs_quota_on, + .quota_off = f2fs_quota_off, + .quota_sync = f2fs_quota_sync, + .get_state = dquot_get_state, + .set_info = dquot_set_dqinfo, + .get_dqblk = dquot_get_dqblk, + .set_dqblk = dquot_set_dqblk, + .get_nextdqblk = dquot_get_next_dqblk, +}; +#else +void f2fs_quota_off_umount(struct super_block *sb) +{ +} +#endif + +static const struct super_operations f2fs_sops = { .alloc_inode = f2fs_alloc_inode, .drop_inode = f2fs_drop_inode, .destroy_inode = f2fs_destroy_inode, .write_inode = f2fs_write_inode, .dirty_inode = f2fs_dirty_inode, .show_options = f2fs_show_options, +#ifdef CONFIG_QUOTA + .quota_read = f2fs_quota_read, + .quota_write = f2fs_quota_write, + .get_dquots = f2fs_get_dquots, +#endif .evict_inode = f2fs_evict_inode, .put_super = f2fs_put_super, .sync_fs = f2fs_sync_fs, @@ -1120,12 +1576,6 @@ static int f2fs_get_context(struct inode *inode, void *ctx, size_t len) ctx, len, NULL); } -static int f2fs_key_prefix(struct inode *inode, u8 **key) -{ - *key = F2FS_I_SB(inode)->key_prefix; - return F2FS_I_SB(inode)->key_prefix_size; -} - static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len, void *fs_data) { @@ -1140,16 +1590,16 @@ static unsigned f2fs_max_namelen(struct inode *inode) inode->i_sb->s_blocksize : F2FS_NAME_LEN; } -static struct fscrypt_operations f2fs_cryptops = { +static const struct fscrypt_operations f2fs_cryptops = { + .key_prefix = "f2fs:", .get_context = f2fs_get_context, - .key_prefix = f2fs_key_prefix, .set_context = f2fs_set_context, .is_encrypted = f2fs_encrypted_inode, .empty_dir = f2fs_empty_dir, .max_namelen = f2fs_max_namelen, }; #else -static struct fscrypt_operations f2fs_cryptops = { +static const struct fscrypt_operations f2fs_cryptops = { .is_encrypted = f2fs_encrypted_inode, }; #endif @@ -1201,9 +1651,16 @@ static const struct export_operations f2fs_export_ops = { static loff_t max_file_blocks(void) { - loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS); + loff_t result = 0; loff_t leaf_count = ADDRS_PER_BLOCK; + /* + * note: previously, result is equal to (DEF_ADDRS_PER_INODE - + * F2FS_INLINE_XATTR_ADDRS), but now f2fs try to reserve more + * space in inode.i_addr, it will be more safe to reassign + * result as zero. + */ + /* two direct node blocks */ result += (leaf_count * 2); @@ -1229,7 +1686,7 @@ static int __f2fs_commit_super(struct buffer_head *bh, unlock_buffer(bh); /* it's rare case, we can do fua all the time */ - return __sync_dirty_buffer(bh, WRITE_FLUSH_FUA); + return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA); } static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, @@ -1424,6 +1881,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) unsigned int total, fsmeta; struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + unsigned int ovp_segments, reserved_segments; unsigned int main_segs, blocks_per_seg; int i; @@ -1437,20 +1895,28 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) if (unlikely(fsmeta >= total)) return 1; - main_segs = le32_to_cpu(sbi->raw_super->segment_count_main); + ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); + reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count); + + if (unlikely(fsmeta < F2FS_MIN_SEGMENTS || + ovp_segments == 0 || reserved_segments == 0)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Wrong layout: check mkfs.f2fs version"); + return 1; + } + + main_segs = le32_to_cpu(raw_super->segment_count_main); blocks_per_seg = sbi->blocks_per_seg; for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs || - le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg) { + le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg) return 1; - } } for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs || - le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg) { + le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg) return 1; - } } if (unlikely(f2fs_cp_error(sbi))) { @@ -1463,7 +1929,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) static void init_sb_info(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = sbi->raw_super; - int i; + int i, j; sbi->log_sectors_per_block = le32_to_cpu(raw_super->log_sectors_per_block); @@ -1491,17 +1957,14 @@ static void init_sb_info(struct f2fs_sb_info *sbi) for (i = 0; i < NR_COUNT_TYPE; i++) atomic_set(&sbi->nr_pages[i], 0); + atomic_set(&sbi->wb_sync_req, 0); + INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); - mutex_init(&sbi->wio_mutex[NODE]); - mutex_init(&sbi->wio_mutex[DATA]); + for (i = 0; i < NR_PAGE_TYPE - 1; i++) + for (j = HOT; j < NR_TEMP_TYPE; j++) + mutex_init(&sbi->wio_mutex[i][j]); spin_lock_init(&sbi->cp_lock); - -#ifdef CONFIG_F2FS_FS_ENCRYPTION - memcpy(sbi->key_prefix, F2FS_KEY_DESC_PREFIX, - F2FS_KEY_DESC_PREFIX_SIZE); - sbi->key_prefix_size = F2FS_KEY_DESC_PREFIX_SIZE; -#endif } static int init_percpu_info(struct f2fs_sb_info *sbi) @@ -1516,6 +1979,71 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) GFP_KERNEL); } +#ifdef CONFIG_BLK_DEV_ZONED +static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) +{ + struct block_device *bdev = FDEV(devi).bdev; + sector_t nr_sectors = bdev->bd_part->nr_sects; + sector_t sector = 0; + struct blk_zone *zones; + unsigned int i, nr_zones; + unsigned int n = 0; + int err = -EIO; + + if (!f2fs_sb_mounted_blkzoned(sbi->sb)) + return 0; + + if (sbi->blocks_per_blkz && sbi->blocks_per_blkz != + SECTOR_TO_BLOCK(bdev_zone_sectors(bdev))) + return -EINVAL; + sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_sectors(bdev)); + if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz != + __ilog2_u32(sbi->blocks_per_blkz)) + return -EINVAL; + sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz); + FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >> + sbi->log_blocks_per_blkz; + if (nr_sectors & (bdev_zone_sectors(bdev) - 1)) + FDEV(devi).nr_blkz++; + + FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL); + if (!FDEV(devi).blkz_type) + return -ENOMEM; + +#define F2FS_REPORT_NR_ZONES 4096 + + zones = kcalloc(F2FS_REPORT_NR_ZONES, sizeof(struct blk_zone), + GFP_KERNEL); + if (!zones) + return -ENOMEM; + + /* Get block zones type */ + while (zones && sector < nr_sectors) { + + nr_zones = F2FS_REPORT_NR_ZONES; + err = blkdev_report_zones(bdev, sector, + zones, &nr_zones, + GFP_KERNEL); + if (err) + break; + if (!nr_zones) { + err = -EIO; + break; + } + + for (i = 0; i < nr_zones; i++) { + FDEV(devi).blkz_type[n] = zones[i].type; + sector += zones[i].len; + n++; + } + } + + kfree(zones); + + return err; +} +#endif + /* * Read f2fs raw super block. * Because we have two copies of super block, so read both of them @@ -1608,6 +2136,104 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) return err; } +static int f2fs_scan_devices(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + unsigned int max_devices = MAX_DEVICES; + int i; + + /* Initialize single device information */ + if (!RDEV(0).path[0]) { +#ifdef CONFIG_BLK_DEV_ZONED + if (!bdev_is_zoned(sbi->sb->s_bdev)) + return 0; + max_devices = 1; +#else + return 0; +#endif + } + + /* + * Initialize multiple devices information, or single + * zoned block device information. + */ + sbi->devs = kcalloc(max_devices, sizeof(struct f2fs_dev_info), + GFP_KERNEL); + if (!sbi->devs) + return -ENOMEM; + + for (i = 0; i < max_devices; i++) { + + if (i > 0 && !RDEV(i).path[0]) + break; + + if (max_devices == 1) { + /* Single zoned block device mount */ + FDEV(0).bdev = + blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev, + sbi->sb->s_mode, sbi->sb->s_type); + } else { + /* Multi-device mount */ + memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN); + FDEV(i).total_segments = + le32_to_cpu(RDEV(i).total_segments); + if (i == 0) { + FDEV(i).start_blk = 0; + FDEV(i).end_blk = FDEV(i).start_blk + + (FDEV(i).total_segments << + sbi->log_blocks_per_seg) - 1 + + le32_to_cpu(raw_super->segment0_blkaddr); + } else { + FDEV(i).start_blk = FDEV(i - 1).end_blk + 1; + FDEV(i).end_blk = FDEV(i).start_blk + + (FDEV(i).total_segments << + sbi->log_blocks_per_seg) - 1; + } + FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path, + sbi->sb->s_mode, sbi->sb->s_type); + } + if (IS_ERR(FDEV(i).bdev)) + return PTR_ERR(FDEV(i).bdev); + + /* to release errored devices */ + sbi->s_ndevs = i + 1; + +#ifdef CONFIG_BLK_DEV_ZONED + if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && + !f2fs_sb_mounted_blkzoned(sbi->sb)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Zoned block device feature not enabled\n"); + return -EINVAL; + } + if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) { + if (init_blkz_info(sbi, i)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Failed to initialize F2FS blkzone information"); + return -EINVAL; + } + if (max_devices == 1) + break; + f2fs_msg(sbi->sb, KERN_INFO, + "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)", + i, FDEV(i).path, + FDEV(i).total_segments, + FDEV(i).start_blk, FDEV(i).end_blk, + bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ? + "Host-aware" : "Host-managed"); + continue; + } +#endif + f2fs_msg(sbi->sb, KERN_INFO, + "Mount Device [%2d]: %20s, %8u, %8x - %8x", + i, FDEV(i).path, + FDEV(i).total_segments, + FDEV(i).start_blk, FDEV(i).end_blk); + } + f2fs_msg(sbi->sb, KERN_INFO, + "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi)); + return 0; +} + static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; @@ -1655,6 +2281,24 @@ try_onemore: sb->s_fs_info = sbi; sbi->raw_super = raw_super; + /* precompute checksum seed for metadata */ + if (f2fs_sb_has_inode_chksum(sb)) + sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid, + sizeof(raw_super->uuid)); + + /* + * The BLKZONED feature indicates that the drive was formatted with + * zone alignment optimization. This is optional for host-aware + * devices, but mandatory for host-managed zoned block devices. + */ +#ifndef CONFIG_BLK_DEV_ZONED + if (f2fs_sb_mounted_blkzoned(sb)) { + f2fs_msg(sb, KERN_ERR, + "Zoned block device support is not enabled\n"); + err = -EOPNOTSUPP; + goto free_sb_buf; + } +#endif default_options(sbi); /* parse mount options */ options = kstrdup((const char *)data, GFP_KERNEL); @@ -1673,6 +2317,12 @@ try_onemore: sb->s_max_links = F2FS_LINK_MAX; get_random_bytes(&sbi->s_next_generation, sizeof(u32)); +#ifdef CONFIG_QUOTA + sb->dq_op = &f2fs_quota_operations; + sb->s_qcop = &f2fs_quotactl_ops; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; +#endif + sb->s_op = &f2fs_sops; sb->s_cop = &f2fs_cryptops; sb->s_xattr = f2fs_xattr_handlers; @@ -1681,25 +2331,41 @@ try_onemore: sb->s_time_gran = 1; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); - memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); + memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); /* init f2fs-specific super block info */ sbi->valid_super_block = valid_super_block; mutex_init(&sbi->gc_mutex); mutex_init(&sbi->cp_mutex); init_rwsem(&sbi->node_write); + init_rwsem(&sbi->node_change); /* disallow all the data/node/meta page writes */ set_sbi_flag(sbi, SBI_POR_DOING); spin_lock_init(&sbi->stat_lock); - init_rwsem(&sbi->read_io.io_rwsem); - sbi->read_io.sbi = sbi; - sbi->read_io.bio = NULL; + /* init iostat info */ + spin_lock_init(&sbi->iostat_lock); + sbi->iostat_enable = false; + for (i = 0; i < NR_PAGE_TYPE; i++) { - init_rwsem(&sbi->write_io[i].io_rwsem); - sbi->write_io[i].sbi = sbi; - sbi->write_io[i].bio = NULL; + int n = (i == META) ? 1: NR_TEMP_TYPE; + int j; + + sbi->write_io[i] = kmalloc(n * sizeof(struct f2fs_bio_info), + GFP_KERNEL); + if (!sbi->write_io[i]) { + err = -ENOMEM; + goto free_options; + } + + for (j = HOT; j < n; j++) { + init_rwsem(&sbi->write_io[i][j].io_rwsem); + sbi->write_io[i][j].sbi = sbi; + sbi->write_io[i][j].bio = NULL; + spin_lock_init(&sbi->write_io[i][j].io_lock); + INIT_LIST_HEAD(&sbi->write_io[i][j].io_list); + } } init_rwsem(&sbi->cp_rwsem); @@ -1710,12 +2376,21 @@ try_onemore: if (err) goto free_options; + if (F2FS_IO_SIZE(sbi) > 1) { + sbi->write_io_dummy = + mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0); + if (!sbi->write_io_dummy) { + err = -ENOMEM; + goto free_options; + } + } + /* get an inode for meta space */ sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); if (IS_ERR(sbi->meta_inode)) { f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); err = PTR_ERR(sbi->meta_inode); - goto free_options; + goto free_io_dummy; } err = get_valid_checkpoint(sbi); @@ -1724,6 +2399,13 @@ try_onemore: goto free_meta_inode; } + /* Initialize device list */ + err = f2fs_scan_devices(sbi); + if (err) { + f2fs_msg(sb, KERN_ERR, "Failed to find devices"); + goto free_devices; + } + sbi->total_valid_node_count = le32_to_cpu(sbi->ckpt->valid_node_count); percpu_counter_set(&sbi->total_valid_inode_count, @@ -1732,6 +2414,7 @@ try_onemore: sbi->total_valid_block_count = le64_to_cpu(sbi->ckpt->valid_block_count); sbi->last_valid_block_count = sbi->total_valid_block_count; + sbi->reserved_blocks = 0; for (i = 0; i < NR_INODE_TYPE; i++) { INIT_LIST_HEAD(&sbi->inode_list[i]); @@ -1779,10 +2462,9 @@ try_onemore: f2fs_join_shrinker(sbi); - /* if there are nt orphan nodes free them */ - err = recover_orphan_inodes(sbi); + err = f2fs_build_stats(sbi); if (err) - goto free_node_inode; + goto free_nm; /* read root inode and dentry */ root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); @@ -1803,26 +2485,14 @@ try_onemore: goto free_root_inode; } - err = f2fs_build_stats(sbi); + err = f2fs_register_sysfs(sbi); if (err) goto free_root_inode; - if (f2fs_proc_root) - sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); - - if (sbi->s_proc) { - proc_create_data("segment_info", S_IRUGO, sbi->s_proc, - &f2fs_seq_segment_info_fops, sb); - proc_create_data("segment_bits", S_IRUGO, sbi->s_proc, - &f2fs_seq_segment_bits_fops, sb); - } - - sbi->s_kobj.kset = f2fs_kset; - init_completion(&sbi->s_kobj_unregister); - err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, - "%s", sb->s_id); + /* if there are nt orphan nodes free them */ + err = recover_orphan_inodes(sbi); if (err) - goto free_proc; + goto free_sysfs; /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { @@ -1833,7 +2503,7 @@ try_onemore: if (bdev_read_only(sb->s_bdev) && !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { err = -EROFS; - goto free_kobj; + goto free_meta; } if (need_fsck) @@ -1847,7 +2517,7 @@ try_onemore: need_fsck = true; f2fs_msg(sb, KERN_ERR, "Cannot recover all fsync data errno=%d", err); - goto free_kobj; + goto free_meta; } } else { err = recover_fsync_data(sbi, true); @@ -1856,7 +2526,7 @@ try_onemore: err = -EINVAL; f2fs_msg(sb, KERN_ERR, "Need to recover fsync data"); - goto free_kobj; + goto free_sysfs; } } skip_recovery: @@ -1871,7 +2541,7 @@ skip_recovery: /* After POR, we can run background GC thread.*/ err = start_gc_thread(sbi); if (err) - goto free_kobj; + goto free_meta; } kfree(options); @@ -1883,22 +2553,23 @@ skip_recovery: sbi->valid_super_block ? 1 : 2, err); } + f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx", + cur_cp_version(F2FS_CKPT(sbi))); f2fs_update_time(sbi, CP_TIME); f2fs_update_time(sbi, REQ_TIME); return 0; -free_kobj: +free_meta: f2fs_sync_inode_meta(sbi); - kobject_del(&sbi->s_kobj); - kobject_put(&sbi->s_kobj); - wait_for_completion(&sbi->s_kobj_unregister); -free_proc: - if (sbi->s_proc) { - remove_proc_entry("segment_info", sbi->s_proc); - remove_proc_entry("segment_bits", sbi->s_proc); - remove_proc_entry(sb->s_id, f2fs_proc_root); - } - f2fs_destroy_stats(sbi); + /* + * Some dirty meta pages can be produced by recover_orphan_inodes() + * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg() + * followed by write_checkpoint() through f2fs_write_node_pages(), which + * falls into an infinite loop in sync_meta_pages(). + */ + truncate_inode_pages_final(META_MAPPING(sbi)); +free_sysfs: + f2fs_unregister_sysfs(sbi); free_root_inode: dput(sb->s_root); sb->s_root = NULL; @@ -1909,16 +2580,27 @@ free_node_inode: f2fs_leave_shrinker(sbi); iput(sbi->node_inode); mutex_unlock(&sbi->umount_mutex); + f2fs_destroy_stats(sbi); free_nm: destroy_node_manager(sbi); free_sm: destroy_segment_manager(sbi); +free_devices: + destroy_device_list(sbi); kfree(sbi->ckpt); free_meta_inode: make_bad_inode(sbi->meta_inode); iput(sbi->meta_inode); +free_io_dummy: + mempool_destroy(sbi->write_io_dummy); free_options: + for (i = 0; i < NR_PAGE_TYPE; i++) + kfree(sbi->write_io[i]); destroy_percpu_info(sbi); +#ifdef CONFIG_QUOTA + for (i = 0; i < MAXQUOTAS; i++) + kfree(sbi->s_qf_names[i]); +#endif kfree(options); free_sb_buf: kfree(raw_super); @@ -1944,8 +2626,11 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, static void kill_f2fs_super(struct super_block *sb) { - if (sb->s_root) + if (sb->s_root) { set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE); + stop_gc_thread(F2FS_SB(sb)); + stop_discard_thread(F2FS_SB(sb)); + } kill_block_super(sb); } @@ -1999,30 +2684,26 @@ static int __init init_f2fs_fs(void) err = create_extent_cache(); if (err) goto free_checkpoint_caches; - f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); - if (!f2fs_kset) { - err = -ENOMEM; + err = f2fs_init_sysfs(); + if (err) goto free_extent_cache; - } err = register_shrinker(&f2fs_shrinker_info); if (err) - goto free_kset; - + goto free_sysfs; err = register_filesystem(&f2fs_fs_type); if (err) goto free_shrinker; err = f2fs_create_root_stats(); if (err) goto free_filesystem; - f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); return 0; free_filesystem: unregister_filesystem(&f2fs_fs_type); free_shrinker: unregister_shrinker(&f2fs_shrinker_info); -free_kset: - kset_unregister(f2fs_kset); +free_sysfs: + f2fs_exit_sysfs(); free_extent_cache: destroy_extent_cache(); free_checkpoint_caches: @@ -2039,11 +2720,10 @@ fail: static void __exit exit_f2fs_fs(void) { - remove_proc_entry("fs/f2fs", NULL); f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); unregister_shrinker(&f2fs_shrinker_info); - kset_unregister(f2fs_kset); + f2fs_exit_sysfs(); destroy_extent_cache(); destroy_checkpoint_caches(); destroy_segment_manager_caches(); @@ -2058,3 +2738,4 @@ module_exit(exit_f2fs_fs) MODULE_AUTHOR("Samsung Electronics's Praesto Team"); MODULE_DESCRIPTION("Flash Friendly File System"); MODULE_LICENSE("GPL"); + diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c new file mode 100644 index 000000000000..e2c258f717cd --- /dev/null +++ b/fs/f2fs/sysfs.c @@ -0,0 +1,556 @@ +/* + * f2fs sysfs interface + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * Copyright (c) 2017 Chao Yu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + +#include "f2fs.h" +#include "segment.h" +#include "gc.h" + +static struct proc_dir_entry *f2fs_proc_root; + +/* Sysfs support for f2fs */ +enum { + GC_THREAD, /* struct f2fs_gc_thread */ + SM_INFO, /* struct f2fs_sm_info */ + DCC_INFO, /* struct discard_cmd_control */ + NM_INFO, /* struct f2fs_nm_info */ + F2FS_SBI, /* struct f2fs_sb_info */ +#ifdef CONFIG_F2FS_FAULT_INJECTION + FAULT_INFO_RATE, /* struct f2fs_fault_info */ + FAULT_INFO_TYPE, /* struct f2fs_fault_info */ +#endif + RESERVED_BLOCKS, +}; + +struct f2fs_attr { + struct attribute attr; + ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); + ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, + const char *, size_t); + int struct_type; + int offset; + int id; +}; + +static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) +{ + if (struct_type == GC_THREAD) + return (unsigned char *)sbi->gc_thread; + else if (struct_type == SM_INFO) + return (unsigned char *)SM_I(sbi); + else if (struct_type == DCC_INFO) + return (unsigned char *)SM_I(sbi)->dcc_info; + else if (struct_type == NM_INFO) + return (unsigned char *)NM_I(sbi); + else if (struct_type == F2FS_SBI || struct_type == RESERVED_BLOCKS) + return (unsigned char *)sbi; +#ifdef CONFIG_F2FS_FAULT_INJECTION + else if (struct_type == FAULT_INFO_RATE || + struct_type == FAULT_INFO_TYPE) + return (unsigned char *)&sbi->fault_info; +#endif + return NULL; +} + +static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct super_block *sb = sbi->sb; + + if (!sb->s_bdev->bd_part) + return snprintf(buf, PAGE_SIZE, "0\n"); + + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)(sbi->kbytes_written + + BD_PART_WRITTEN(sbi))); +} + +static ssize_t features_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct super_block *sb = sbi->sb; + int len = 0; + + if (!sb->s_bdev->bd_part) + return snprintf(buf, PAGE_SIZE, "0\n"); + + if (f2fs_sb_has_crypto(sb)) + len += snprintf(buf, PAGE_SIZE - len, "%s", + "encryption"); + if (f2fs_sb_mounted_blkzoned(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "blkzoned"); + if (f2fs_sb_has_extra_attr(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "extra_attr"); + if (f2fs_sb_has_project_quota(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "projquota"); + if (f2fs_sb_has_inode_chksum(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "inode_checksum"); + len += snprintf(buf + len, PAGE_SIZE - len, "\n"); + return len; +} + +static ssize_t f2fs_sbi_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + unsigned char *ptr = NULL; + unsigned int *ui; + + ptr = __struct_ptr(sbi, a->struct_type); + if (!ptr) + return -EINVAL; + + ui = (unsigned int *)(ptr + a->offset); + + return snprintf(buf, PAGE_SIZE, "%u\n", *ui); +} + +static ssize_t f2fs_sbi_store(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, + const char *buf, size_t count) +{ + unsigned char *ptr; + unsigned long t; + unsigned int *ui; + ssize_t ret; + + ptr = __struct_ptr(sbi, a->struct_type); + if (!ptr) + return -EINVAL; + + ui = (unsigned int *)(ptr + a->offset); + + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret < 0) + return ret; +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX)) + return -EINVAL; +#endif + if (a->struct_type == RESERVED_BLOCKS) { + spin_lock(&sbi->stat_lock); + if ((unsigned long)sbi->total_valid_block_count + t > + (unsigned long)sbi->user_block_count) { + spin_unlock(&sbi->stat_lock); + return -EINVAL; + } + *ui = t; + spin_unlock(&sbi->stat_lock); + return count; + } + + if (!strcmp(a->attr.name, "discard_granularity")) { + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + int i; + + if (t == 0 || t > MAX_PLIST_NUM) + return -EINVAL; + if (t == *ui) + return count; + + mutex_lock(&dcc->cmd_lock); + for (i = 0; i < MAX_PLIST_NUM; i++) { + if (i >= t - 1) + dcc->pend_list_tag[i] |= P_ACTIVE; + else + dcc->pend_list_tag[i] &= (~P_ACTIVE); + } + mutex_unlock(&dcc->cmd_lock); + + *ui = t; + return count; + } + + *ui = t; + + if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0) + f2fs_reset_iostat(sbi); + if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) { + sbi->gc_thread->gc_wake = 1; + wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head); + wake_up_discard_thread(sbi, true); + } + + return count; +} + +static ssize_t f2fs_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->show ? a->show(a, sbi, buf) : 0; +} + +static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->store ? a->store(a, sbi, buf, len) : 0; +} + +static void f2fs_sb_release(struct kobject *kobj) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + complete(&sbi->s_kobj_unregister); +} + +enum feat_id { + FEAT_CRYPTO = 0, + FEAT_BLKZONED, + FEAT_ATOMIC_WRITE, + FEAT_EXTRA_ATTR, + FEAT_PROJECT_QUOTA, + FEAT_INODE_CHECKSUM, +}; + +static ssize_t f2fs_feature_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + switch (a->id) { + case FEAT_CRYPTO: + case FEAT_BLKZONED: + case FEAT_ATOMIC_WRITE: + case FEAT_EXTRA_ATTR: + case FEAT_PROJECT_QUOTA: + case FEAT_INODE_CHECKSUM: + return snprintf(buf, PAGE_SIZE, "supported\n"); + } + return 0; +} + +#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \ +static struct f2fs_attr f2fs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + .struct_type = _struct_type, \ + .offset = _offset \ +} + +#define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \ + F2FS_ATTR_OFFSET(struct_type, name, 0644, \ + f2fs_sbi_show, f2fs_sbi_store, \ + offsetof(struct struct_name, elname)) + +#define F2FS_GENERAL_RO_ATTR(name) \ +static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL) + +#define F2FS_FEATURE_RO_ATTR(_name, _id) \ +static struct f2fs_attr f2fs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = 0444 }, \ + .show = f2fs_feature_show, \ + .id = _id, \ +} + +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent_sleep_time, + urgent_sleep_time); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent, gc_urgent); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); +F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks); +F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); +F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); +F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); +#ifdef CONFIG_F2FS_FAULT_INJECTION +F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); +F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); +#endif +F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); +F2FS_GENERAL_RO_ATTR(features); + +#ifdef CONFIG_F2FS_FS_ENCRYPTION +F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); +#endif +#ifdef CONFIG_BLK_DEV_ZONED +F2FS_FEATURE_RO_ATTR(block_zoned, FEAT_BLKZONED); +#endif +F2FS_FEATURE_RO_ATTR(atomic_write, FEAT_ATOMIC_WRITE); +F2FS_FEATURE_RO_ATTR(extra_attr, FEAT_EXTRA_ATTR); +F2FS_FEATURE_RO_ATTR(project_quota, FEAT_PROJECT_QUOTA); +F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM); + +#define ATTR_LIST(name) (&f2fs_attr_##name.attr) +static struct attribute *f2fs_attrs[] = { + ATTR_LIST(gc_urgent_sleep_time), + ATTR_LIST(gc_min_sleep_time), + ATTR_LIST(gc_max_sleep_time), + ATTR_LIST(gc_no_gc_sleep_time), + ATTR_LIST(gc_idle), + ATTR_LIST(gc_urgent), + ATTR_LIST(reclaim_segments), + ATTR_LIST(max_small_discards), + ATTR_LIST(discard_granularity), + ATTR_LIST(batched_trim_sections), + ATTR_LIST(ipu_policy), + ATTR_LIST(min_ipu_util), + ATTR_LIST(min_fsync_blocks), + ATTR_LIST(min_hot_blocks), + ATTR_LIST(max_victim_search), + ATTR_LIST(dir_level), + ATTR_LIST(ram_thresh), + ATTR_LIST(ra_nid_pages), + ATTR_LIST(dirty_nats_ratio), + ATTR_LIST(cp_interval), + ATTR_LIST(idle_interval), + ATTR_LIST(iostat_enable), +#ifdef CONFIG_F2FS_FAULT_INJECTION + ATTR_LIST(inject_rate), + ATTR_LIST(inject_type), +#endif + ATTR_LIST(lifetime_write_kbytes), + ATTR_LIST(features), + ATTR_LIST(reserved_blocks), + NULL, +}; + +static struct attribute *f2fs_feat_attrs[] = { +#ifdef CONFIG_F2FS_FS_ENCRYPTION + ATTR_LIST(encryption), +#endif +#ifdef CONFIG_BLK_DEV_ZONED + ATTR_LIST(block_zoned), +#endif + ATTR_LIST(atomic_write), + ATTR_LIST(extra_attr), + ATTR_LIST(project_quota), + ATTR_LIST(inode_checksum), + NULL, +}; + +static const struct sysfs_ops f2fs_attr_ops = { + .show = f2fs_attr_show, + .store = f2fs_attr_store, +}; + +static struct kobj_type f2fs_sb_ktype = { + .default_attrs = f2fs_attrs, + .sysfs_ops = &f2fs_attr_ops, + .release = f2fs_sb_release, +}; + +static struct kobj_type f2fs_ktype = { + .sysfs_ops = &f2fs_attr_ops, +}; + +static struct kset f2fs_kset = { + .kobj = {.ktype = &f2fs_ktype}, +}; + +static struct kobj_type f2fs_feat_ktype = { + .default_attrs = f2fs_feat_attrs, + .sysfs_ops = &f2fs_attr_ops, +}; + +static struct kobject f2fs_feat = { + .kset = &f2fs_kset, +}; + +static int segment_info_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + unsigned int total_segs = + le32_to_cpu(sbi->raw_super->segment_count_main); + int i; + + seq_puts(seq, "format: segment_type|valid_blocks\n" + "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); + + for (i = 0; i < total_segs; i++) { + struct seg_entry *se = get_seg_entry(sbi, i); + + if ((i % 10) == 0) + seq_printf(seq, "%-10d", i); + seq_printf(seq, "%d|%-3u", se->type, + get_valid_blocks(sbi, i, false)); + if ((i % 10) == 9 || i == (total_segs - 1)) + seq_putc(seq, '\n'); + else + seq_putc(seq, ' '); + } + + return 0; +} + +static int segment_bits_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + unsigned int total_segs = + le32_to_cpu(sbi->raw_super->segment_count_main); + int i, j; + + seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n" + "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); + + for (i = 0; i < total_segs; i++) { + struct seg_entry *se = get_seg_entry(sbi, i); + + seq_printf(seq, "%-10d", i); + seq_printf(seq, "%d|%-3u|", se->type, + get_valid_blocks(sbi, i, false)); + for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) + seq_printf(seq, " %.2x", se->cur_valid_map[j]); + seq_putc(seq, '\n'); + } + return 0; +} + +static int iostat_info_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + time64_t now = ktime_get_real_seconds(); + + if (!sbi->iostat_enable) + return 0; + + seq_printf(seq, "time: %-16llu\n", now); + + /* print app IOs */ + seq_printf(seq, "app buffered: %-16llu\n", + sbi->write_iostat[APP_BUFFERED_IO]); + seq_printf(seq, "app direct: %-16llu\n", + sbi->write_iostat[APP_DIRECT_IO]); + seq_printf(seq, "app mapped: %-16llu\n", + sbi->write_iostat[APP_MAPPED_IO]); + + /* print fs IOs */ + seq_printf(seq, "fs data: %-16llu\n", + sbi->write_iostat[FS_DATA_IO]); + seq_printf(seq, "fs node: %-16llu\n", + sbi->write_iostat[FS_NODE_IO]); + seq_printf(seq, "fs meta: %-16llu\n", + sbi->write_iostat[FS_META_IO]); + seq_printf(seq, "fs gc data: %-16llu\n", + sbi->write_iostat[FS_GC_DATA_IO]); + seq_printf(seq, "fs gc node: %-16llu\n", + sbi->write_iostat[FS_GC_NODE_IO]); + seq_printf(seq, "fs cp data: %-16llu\n", + sbi->write_iostat[FS_CP_DATA_IO]); + seq_printf(seq, "fs cp node: %-16llu\n", + sbi->write_iostat[FS_CP_NODE_IO]); + seq_printf(seq, "fs cp meta: %-16llu\n", + sbi->write_iostat[FS_CP_META_IO]); + seq_printf(seq, "fs discard: %-16llu\n", + sbi->write_iostat[FS_DISCARD]); + + return 0; +} + +#define F2FS_PROC_FILE_DEF(_name) \ +static int _name##_open_fs(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, _name##_seq_show, PDE_DATA(inode)); \ +} \ + \ +static const struct file_operations f2fs_seq_##_name##_fops = { \ + .open = _name##_open_fs, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ +}; + +F2FS_PROC_FILE_DEF(segment_info); +F2FS_PROC_FILE_DEF(segment_bits); +F2FS_PROC_FILE_DEF(iostat_info); + +int __init f2fs_init_sysfs(void) +{ + int ret; + + kobject_set_name(&f2fs_kset.kobj, "f2fs"); + f2fs_kset.kobj.parent = fs_kobj; + ret = kset_register(&f2fs_kset); + if (ret) + return ret; + + ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype, + NULL, "features"); + if (ret) + kset_unregister(&f2fs_kset); + else + f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + return ret; +} + +void f2fs_exit_sysfs(void) +{ + kobject_put(&f2fs_feat); + kset_unregister(&f2fs_kset); + remove_proc_entry("fs/f2fs", NULL); + f2fs_proc_root = NULL; +} + +int f2fs_register_sysfs(struct f2fs_sb_info *sbi) +{ + struct super_block *sb = sbi->sb; + int err; + + sbi->s_kobj.kset = &f2fs_kset; + init_completion(&sbi->s_kobj_unregister); + err = kobject_init_and_add(&sbi->s_kobj, &f2fs_sb_ktype, NULL, + "%s", sb->s_id); + if (err) + return err; + + if (f2fs_proc_root) + sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); + + if (sbi->s_proc) { + proc_create_data("segment_info", S_IRUGO, sbi->s_proc, + &f2fs_seq_segment_info_fops, sb); + proc_create_data("segment_bits", S_IRUGO, sbi->s_proc, + &f2fs_seq_segment_bits_fops, sb); + proc_create_data("iostat_info", S_IRUGO, sbi->s_proc, + &f2fs_seq_iostat_info_fops, sb); + } + return 0; +} + +void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) +{ + if (sbi->s_proc) { + remove_proc_entry("iostat_info", sbi->s_proc); + remove_proc_entry("segment_info", sbi->s_proc); + remove_proc_entry("segment_bits", sbi->s_proc); + remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); + } + kobject_del(&sbi->s_kobj); +} diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index 73b4e1d1912a..bccbbf2616d2 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -59,7 +59,7 @@ void f2fs_trace_pid(struct page *page) pid_t pid = task_pid_nr(current); void *p; - page->private = pid; + set_page_private(page, (unsigned long)pid); if (radix_tree_preload(GFP_NOFS)) return; @@ -138,7 +138,7 @@ static unsigned int gang_lookup_pids(pid_t *results, unsigned long first_index, radix_tree_for_each_slot(slot, &pids, &iter, first_index) { results[ret] = iter.index; - if (++ret == PIDVEC_SIZE) + if (++ret == max_items) break; } return ret; diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 3e1c0280f866..7c65540148f8 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -106,7 +106,7 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler, return -EINVAL; F2FS_I(inode)->i_advise |= *(char *)value; - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, true); return 0; } @@ -217,18 +217,123 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index, return entry; } +static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr, + void **last_addr, int index, + size_t len, const char *name) +{ + struct f2fs_xattr_entry *entry; + unsigned int inline_size = F2FS_INLINE_XATTR_ADDRS << 2; + + list_for_each_xattr(entry, base_addr) { + if ((void *)entry + sizeof(__u32) > base_addr + inline_size || + (void *)XATTR_NEXT_ENTRY(entry) + sizeof(__u32) > + base_addr + inline_size) { + *last_addr = entry; + return NULL; + } + if (entry->e_name_index != index) + continue; + if (entry->e_name_len != len) + continue; + if (!memcmp(entry->e_name, name, len)) + break; + } + return entry; +} + +static int lookup_all_xattrs(struct inode *inode, struct page *ipage, + unsigned int index, unsigned int len, + const char *name, struct f2fs_xattr_entry **xe, + void **base_addr) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + void *cur_addr, *txattr_addr, *last_addr = NULL; + nid_t xnid = F2FS_I(inode)->i_xattr_nid; + unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0; + unsigned int inline_size = inline_xattr_size(inode); + int err = 0; + + if (!size && !inline_size) + return -ENODATA; + + txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE, + GFP_F2FS_ZERO); + if (!txattr_addr) + return -ENOMEM; + + /* read from inline xattr */ + if (inline_size) { + struct page *page = NULL; + void *inline_addr; + + if (ipage) { + inline_addr = inline_xattr_addr(ipage); + } else { + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto out; + } + inline_addr = inline_xattr_addr(page); + } + memcpy(txattr_addr, inline_addr, inline_size); + f2fs_put_page(page, 1); + + *xe = __find_inline_xattr(txattr_addr, &last_addr, + index, len, name); + if (*xe) + goto check; + } + + /* read from xattr node block */ + if (xnid) { + struct page *xpage; + void *xattr_addr; + + /* The inode already has an extended attribute block. */ + xpage = get_node_page(sbi, xnid); + if (IS_ERR(xpage)) { + err = PTR_ERR(xpage); + goto out; + } + + xattr_addr = page_address(xpage); + memcpy(txattr_addr + inline_size, xattr_addr, size); + f2fs_put_page(xpage, 1); + } + + if (last_addr) + cur_addr = XATTR_HDR(last_addr) - 1; + else + cur_addr = txattr_addr; + + *xe = __find_xattr(cur_addr, index, len, name); +check: + if (IS_XATTR_LAST_ENTRY(*xe)) { + err = -ENODATA; + goto out; + } + + *base_addr = txattr_addr; + return 0; +out: + kzfree(txattr_addr); + return err; +} + static int read_all_xattrs(struct inode *inode, struct page *ipage, void **base_addr) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_xattr_header *header; - size_t size = PAGE_SIZE, inline_size = 0; + nid_t xnid = F2FS_I(inode)->i_xattr_nid; + unsigned int size = VALID_XATTR_BLOCK_SIZE; + unsigned int inline_size = inline_xattr_size(inode); void *txattr_addr; int err; - inline_size = inline_xattr_size(inode); - - txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO); + txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE, + GFP_F2FS_ZERO); if (!txattr_addr) return -ENOMEM; @@ -252,19 +357,19 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage, } /* read from xattr node block */ - if (F2FS_I(inode)->i_xattr_nid) { + if (xnid) { struct page *xpage; void *xattr_addr; /* The inode already has an extended attribute block. */ - xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); + xpage = get_node_page(sbi, xnid); if (IS_ERR(xpage)) { err = PTR_ERR(xpage); goto fail; } xattr_addr = page_address(xpage); - memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE); + memcpy(txattr_addr + inline_size, xattr_addr, size); f2fs_put_page(xpage, 1); } @@ -286,14 +391,12 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, void *txattr_addr, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - size_t inline_size = 0; + size_t inline_size = inline_xattr_size(inode); void *xattr_addr; struct page *xpage; nid_t new_nid = 0; int err; - inline_size = inline_xattr_size(inode); - if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid) if (!alloc_nid(sbi, &new_nid)) return -ENOSPC; @@ -339,7 +442,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, } else { struct dnode_of_data dn; set_new_dnode(&dn, inode, NULL, NULL, new_nid); - xpage = new_node_page(&dn, XATTR_NODE_OFFSET, ipage); + xpage = new_node_page(&dn, XATTR_NODE_OFFSET); if (IS_ERR(xpage)) { alloc_nid_failed(sbi, new_nid); return PTR_ERR(xpage); @@ -348,23 +451,20 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, } xattr_addr = page_address(xpage); - memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE - - sizeof(struct node_footer)); + memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE); set_page_dirty(xpage); f2fs_put_page(xpage, 1); - /* need to checkpoint during fsync */ - F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); return 0; } int f2fs_getxattr(struct inode *inode, int index, const char *name, void *buffer, size_t buffer_size, struct page *ipage) { - struct f2fs_xattr_entry *entry; - void *base_addr; + struct f2fs_xattr_entry *entry = NULL; int error = 0; - size_t size, len; + unsigned int size, len; + void *base_addr = NULL; if (name == NULL) return -EINVAL; @@ -373,21 +473,18 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (len > F2FS_NAME_LEN) return -ERANGE; - error = read_all_xattrs(inode, ipage, &base_addr); + down_read(&F2FS_I(inode)->i_xattr_sem); + error = lookup_all_xattrs(inode, ipage, index, len, name, + &entry, &base_addr); + up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; - entry = __find_xattr(base_addr, index, len, name); - if (IS_XATTR_LAST_ENTRY(entry)) { - error = -ENODATA; - goto cleanup; - } - size = le16_to_cpu(entry->e_value_size); if (buffer && size > buffer_size) { error = -ERANGE; - goto cleanup; + goto out; } if (buffer) { @@ -395,8 +492,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, memcpy(buffer, pval, size); } error = size; - -cleanup: +out: kzfree(base_addr); return error; } @@ -409,7 +505,9 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) int error = 0; size_t rest = buffer_size; + down_read(&F2FS_I(inode)->i_xattr_sem); error = read_all_xattrs(inode, NULL, &base_addr); + up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -445,6 +543,15 @@ cleanup: return error; } +static bool f2fs_xattr_value_same(struct f2fs_xattr_entry *entry, + const void *value, size_t size) +{ + void *pval = entry->e_name + entry->e_name_len; + + return (le16_to_cpu(entry->e_value_size) == size) && + !memcmp(pval, value, size); +} + static int __f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, struct page *ipage, int flags) @@ -479,12 +586,17 @@ static int __f2fs_setxattr(struct inode *inode, int index, found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1; - if ((flags & XATTR_REPLACE) && !found) { + if (found) { + if ((flags & XATTR_CREATE)) { + error = -EEXIST; + goto exit; + } + + if (f2fs_xattr_value_same(here, value, size)) + goto exit; + } else if ((flags & XATTR_REPLACE)) { error = -ENODATA; goto exit; - } else if ((flags & XATTR_CREATE) && found) { - error = -EEXIST; - goto exit; } last = here; @@ -554,7 +666,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (index == F2FS_XATTR_INDEX_ENCRYPTION && !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) f2fs_set_encrypted_inode(inode); - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, true); if (!error && S_ISDIR(inode->i_mode)) set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP); exit: @@ -578,7 +690,9 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, f2fs_lock_op(sbi); /* protect xattr_ver */ down_write(&F2FS_I(inode)->i_sem); + down_write(&F2FS_I(inode)->i_xattr_sem); err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags); + up_write(&F2FS_I(inode)->i_xattr_sem); up_write(&F2FS_I(inode)->i_sem); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index f990de20cdcd..dbcd1d16e669 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -58,10 +58,10 @@ struct f2fs_xattr_entry { #define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr) + 1)) #define XATTR_ROUND (3) -#define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND) +#define XATTR_ALIGN(size) (((size) + XATTR_ROUND) & ~XATTR_ROUND) #define ENTRY_SIZE(entry) (XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + \ - entry->e_name_len + le16_to_cpu(entry->e_value_size))) + (entry)->e_name_len + le16_to_cpu((entry)->e_value_size))) #define XATTR_NEXT_ENTRY(entry) ((struct f2fs_xattr_entry *)((char *)(entry) +\ ENTRY_SIZE(entry))) @@ -72,9 +72,10 @@ struct f2fs_xattr_entry { for (entry = XATTR_FIRST_ENTRY(addr);\ !IS_XATTR_LAST_ENTRY(entry);\ entry = XATTR_NEXT_ENTRY(entry)) - -#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE - \ - sizeof(struct node_footer) - sizeof(__u32)) +#define VALID_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer)) +#define XATTR_PADDING_SIZE (sizeof(__u32)) +#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \ + VALID_XATTR_BLOCK_SIZE) #define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \ sizeof(struct f2fs_xattr_header) - \ diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index 5d5ddaa84b21..5eb2e24ce790 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -329,7 +329,14 @@ static void fscache_objlist_config(struct fscache_objlist_data *data) config = 0; rcu_read_lock(); - confkey = user_key_payload(key); + confkey = user_key_payload_rcu(key); + if (!confkey) { + /* key was revoked */ + rcu_read_unlock(); + key_put(key); + goto no_config; + } + buf = confkey->data; for (len = confkey->datalen - 1; len >= 0; len--) { diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 7bff6f46f5da..7a8b1d72e3d9 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1820,29 +1820,27 @@ void gfs2_glock_exit(void) static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi) { - do { - gi->gl = rhashtable_walk_next(&gi->hti); + while ((gi->gl = rhashtable_walk_next(&gi->hti))) { if (IS_ERR(gi->gl)) { if (PTR_ERR(gi->gl) == -EAGAIN) continue; gi->gl = NULL; + return; } - /* Skip entries for other sb and dead entries */ - } while ((gi->gl) && ((gi->sdp != gi->gl->gl_name.ln_sbd) || - __lockref_is_dead(&gi->gl->gl_lockref))); + /* Skip entries for other sb and dead entries */ + if (gi->sdp == gi->gl->gl_name.ln_sbd && + !__lockref_is_dead(&gi->gl->gl_lockref)) + return; + } } static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) { struct gfs2_glock_iter *gi = seq->private; loff_t n = *pos; - int ret; - if (gi->last_pos <= *pos) - n = (*pos - gi->last_pos); - - ret = rhashtable_walk_start(&gi->hti); - if (ret) + rhashtable_walk_enter(&gl_hash_table, &gi->hti); + if (rhashtable_walk_start(&gi->hti) != 0) return NULL; do { @@ -1850,6 +1848,7 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) } while (gi->gl && n--); gi->last_pos = *pos; + return gi->gl; } @@ -1861,6 +1860,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, (*pos)++; gi->last_pos = *pos; gfs2_glock_iter_next(gi); + return gi->gl; } @@ -1870,6 +1870,7 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) gi->gl = NULL; rhashtable_walk_stop(&gi->hti); + rhashtable_walk_exit(&gi->hti); } static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) @@ -1932,12 +1933,10 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file) struct gfs2_glock_iter *gi = seq->private; gi->sdp = inode->i_private; - gi->last_pos = 0; seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); if (seq->buf) seq->size = GFS2_SEQ_GOODSIZE; gi->gl = NULL; - ret = rhashtable_walk_init(&gl_hash_table, &gi->hti, GFP_KERNEL); } return ret; } @@ -1948,7 +1947,6 @@ static int gfs2_glocks_release(struct inode *inode, struct file *file) struct gfs2_glock_iter *gi = seq->private; gi->gl = NULL; - rhashtable_walk_exit(&gi->hti); return seq_release_private(inode, file); } @@ -1960,12 +1958,10 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file) struct seq_file *seq = file->private_data; struct gfs2_glock_iter *gi = seq->private; gi->sdp = inode->i_private; - gi->last_pos = 0; seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); if (seq->buf) seq->size = GFS2_SEQ_GOODSIZE; gi->gl = NULL; - ret = rhashtable_walk_init(&gl_hash_table, &gi->hti, GFP_KERNEL); } return ret; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 704fa0b17309..2c2f182cde03 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -695,14 +695,11 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb, inode = new_inode(sb); if (inode) { - struct hugetlbfs_inode_info *info; inode->i_ino = get_next_ino(); inode->i_mode = S_IFDIR | config->mode; inode->i_uid = config->uid; inode->i_gid = config->gid; inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); - info = HUGETLBFS_I(inode); - mpol_shared_policy_init(&info->policy, NULL); inode->i_op = &hugetlbfs_dir_inode_operations; inode->i_fop = &simple_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ @@ -733,7 +730,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, inode = new_inode(sb); if (inode) { - struct hugetlbfs_inode_info *info; inode->i_ino = get_next_ino(); inode_init_owner(inode, dir, mode); lockdep_set_class(&inode->i_mapping->i_mmap_rwsem, @@ -741,15 +737,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, inode->i_mapping->a_ops = &hugetlbfs_aops; inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); inode->i_mapping->private_data = resv_map; - info = HUGETLBFS_I(inode); - /* - * The policy is initialized here even if we are creating a - * private inode because initialization simply creates an - * an empty rb tree and calls rwlock_init(), later when we - * call mpol_free_shared_policy() it will just return because - * the rb tree will still be empty. - */ - mpol_shared_policy_init(&info->policy, NULL); switch (mode & S_IFMT) { default: init_special_inode(inode, mode, dev); @@ -937,6 +924,18 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) hugetlbfs_inc_free_inodes(sbinfo); return NULL; } + + /* + * Any time after allocation, hugetlbfs_destroy_inode can be called + * for the inode. mpol_free_shared_policy is unconditionally called + * as part of hugetlbfs_destroy_inode. So, initialize policy here + * in case of a quick call to destroy. + * + * Note that the policy is initialized even if we are creating a + * private inode. This simplifies hugetlbfs_destroy_inode. + */ + mpol_shared_policy_init(&p->policy, NULL); + return &p->vfs_inode; } diff --git a/fs/mpage.c b/fs/mpage.c index 1193d43451fc..d4e17c88ce08 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -502,6 +502,16 @@ static void clean_buffers(struct page *page, unsigned first_unmapped) try_to_free_buffers(page); } +/* + * For situations where we want to clean all buffers attached to a page. + * We don't need to calculate how many buffers are attached to the page, + * we just need to specify a number larger than the maximum number of buffers. + */ +void clean_page_buffers(struct page *page) +{ + clean_buffers(page, ~0U); +} + static int __mpage_writepage(struct page *page, struct writeback_control *wbc, void *data) { @@ -640,10 +650,8 @@ alloc_new: if (bio == NULL) { if (first_unmapped == blocks_per_page) { if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9), - page, wbc)) { - clean_buffers(page, first_unmapped); + page, wbc)) goto out; - } } bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH); diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 0a2115084c3f..af84a92cb142 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -75,7 +75,10 @@ nfs4_callback_svc(void *vrqstp) set_freezable(); - while (!kthread_should_stop()) { + while (!kthread_freezable_should_stop(NULL)) { + + if (signal_pending(current)) + flush_signals(current); /* * Listen for a request on the socket */ @@ -84,6 +87,8 @@ nfs4_callback_svc(void *vrqstp) continue; svc_process(rqstp); } + svc_exit_thread(rqstp); + module_put_and_exit(0); return 0; } @@ -102,9 +107,10 @@ nfs41_callback_svc(void *vrqstp) set_freezable(); - while (!kthread_should_stop()) { - if (try_to_freeze()) - continue; + while (!kthread_freezable_should_stop(NULL)) { + + if (signal_pending(current)) + flush_signals(current); prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); spin_lock_bh(&serv->sv_cb_lock); @@ -120,11 +126,13 @@ nfs41_callback_svc(void *vrqstp) error); } else { spin_unlock_bh(&serv->sv_cb_lock); - schedule(); + if (!kthread_should_stop()) + schedule(); finish_wait(&serv->sv_cb_waitq, &wq); } - flush_signals(current); } + svc_exit_thread(rqstp); + module_put_and_exit(0); return 0; } @@ -220,23 +228,23 @@ err_bind: static struct svc_serv_ops nfs40_cb_sv_ops = { .svo_function = nfs4_callback_svc, .svo_enqueue_xprt = svc_xprt_do_enqueue, - .svo_setup = svc_set_num_threads, + .svo_setup = svc_set_num_threads_sync, .svo_module = THIS_MODULE, }; #if defined(CONFIG_NFS_V4_1) static struct svc_serv_ops nfs41_cb_sv_ops = { .svo_function = nfs41_callback_svc, .svo_enqueue_xprt = svc_xprt_do_enqueue, - .svo_setup = svc_set_num_threads, + .svo_setup = svc_set_num_threads_sync, .svo_module = THIS_MODULE, }; -struct svc_serv_ops *nfs4_cb_sv_ops[] = { +static struct svc_serv_ops *nfs4_cb_sv_ops[] = { [0] = &nfs40_cb_sv_ops, [1] = &nfs41_cb_sv_ops, }; #else -struct svc_serv_ops *nfs4_cb_sv_ops[] = { +static struct svc_serv_ops *nfs4_cb_sv_ops[] = { [0] = &nfs40_cb_sv_ops, [1] = NULL, }; diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index c444285bb1b1..835c163f61af 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -316,7 +316,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, if (ret < 0) goto out_up; - payload = user_key_payload(rkey); + payload = user_key_payload_rcu(rkey); if (IS_ERR_OR_NULL(payload)) { ret = PTR_ERR(payload); goto out_up; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 211dc2aed8e1..3069cd46ea66 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -753,6 +753,14 @@ int set_callback_cred(void) return 0; } +void cleanup_callback_cred(void) +{ + if (callback_cred) { + put_rpccred(callback_cred); + callback_cred = NULL; + } +} + static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses) { if (clp->cl_minorversion == 0) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a0dee8ae9f97..d35eb077330f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7012,23 +7012,24 @@ nfs4_state_start(void) ret = set_callback_cred(); if (ret) - return -ENOMEM; + return ret; + laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4"); if (laundry_wq == NULL) { ret = -ENOMEM; - goto out_recovery; + goto out_cleanup_cred; } ret = nfsd4_create_callback_queue(); if (ret) goto out_free_laundry; set_max_delegations(); - return 0; out_free_laundry: destroy_workqueue(laundry_wq); -out_recovery: +out_cleanup_cred: + cleanup_callback_cred(); return ret; } @@ -7086,6 +7087,7 @@ nfs4_state_shutdown(void) { destroy_workqueue(laundry_wq); nfsd4_destroy_callback_queue(); + cleanup_callback_cred(); } static void diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 4516e8b7d776..005c911b34ac 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -615,6 +615,7 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, extern __be32 nfs4_check_open_reclaim(clientid_t *clid, struct nfsd4_compound_state *cstate, struct nfsd_net *nn); extern int set_callback_cred(void); +extern void cleanup_callback_cred(void); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 77d1632e905d..8dce4099a6ca 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -532,6 +532,7 @@ void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) init_waitqueue_head(&res->l_event); INIT_LIST_HEAD(&res->l_blocked_list); INIT_LIST_HEAD(&res->l_mask_waiters); + INIT_LIST_HEAD(&res->l_holders); } void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, @@ -749,6 +750,50 @@ void ocfs2_lock_res_free(struct ocfs2_lock_res *res) res->l_flags = 0UL; } +/* + * Keep a list of processes who have interest in a lockres. + * Note: this is now only uesed for check recursive cluster locking. + */ +static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres, + struct ocfs2_lock_holder *oh) +{ + INIT_LIST_HEAD(&oh->oh_list); + oh->oh_owner_pid = get_pid(task_pid(current)); + + spin_lock(&lockres->l_lock); + list_add_tail(&oh->oh_list, &lockres->l_holders); + spin_unlock(&lockres->l_lock); +} + +static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres, + struct ocfs2_lock_holder *oh) +{ + spin_lock(&lockres->l_lock); + list_del(&oh->oh_list); + spin_unlock(&lockres->l_lock); + + put_pid(oh->oh_owner_pid); +} + +static inline int ocfs2_is_locked_by_me(struct ocfs2_lock_res *lockres) +{ + struct ocfs2_lock_holder *oh; + struct pid *pid; + + /* look in the list of holders for one with the current task as owner */ + spin_lock(&lockres->l_lock); + pid = task_pid(current); + list_for_each_entry(oh, &lockres->l_holders, oh_list) { + if (oh->oh_owner_pid == pid) { + spin_unlock(&lockres->l_lock); + return 1; + } + } + spin_unlock(&lockres->l_lock); + + return 0; +} + static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, int level) { @@ -2333,8 +2378,9 @@ int ocfs2_inode_lock_full_nested(struct inode *inode, goto getbh; } - if (ocfs2_mount_local(osb)) - goto local; + if ((arg_flags & OCFS2_META_LOCK_GETBH) || + ocfs2_mount_local(osb)) + goto update; if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) ocfs2_wait_for_recovery(osb); @@ -2363,7 +2409,7 @@ int ocfs2_inode_lock_full_nested(struct inode *inode, if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) ocfs2_wait_for_recovery(osb); -local: +update: /* * We only see this flag if we're being called from * ocfs2_read_locked_inode(). It means we're locking an inode @@ -2497,6 +2543,59 @@ void ocfs2_inode_unlock(struct inode *inode, ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); } +/* + * This _tracker variantes are introduced to deal with the recursive cluster + * locking issue. The idea is to keep track of a lock holder on the stack of + * the current process. If there's a lock holder on the stack, we know the + * task context is already protected by cluster locking. Currently, they're + * used in some VFS entry routines. + * + * return < 0 on error, return == 0 if there's no lock holder on the stack + * before this call, return == 1 if this call would be a recursive locking. + */ +int ocfs2_inode_lock_tracker(struct inode *inode, + struct buffer_head **ret_bh, + int ex, + struct ocfs2_lock_holder *oh) +{ + int status; + int arg_flags = 0, has_locked; + struct ocfs2_lock_res *lockres; + + lockres = &OCFS2_I(inode)->ip_inode_lockres; + has_locked = ocfs2_is_locked_by_me(lockres); + /* Just get buffer head if the cluster lock has been taken */ + if (has_locked) + arg_flags = OCFS2_META_LOCK_GETBH; + + if (likely(!has_locked || ret_bh)) { + status = ocfs2_inode_lock_full(inode, ret_bh, ex, arg_flags); + if (status < 0) { + if (status != -ENOENT) + mlog_errno(status); + return status; + } + } + if (!has_locked) + ocfs2_add_holder(lockres, oh); + + return has_locked; +} + +void ocfs2_inode_unlock_tracker(struct inode *inode, + int ex, + struct ocfs2_lock_holder *oh, + int had_lock) +{ + struct ocfs2_lock_res *lockres; + + lockres = &OCFS2_I(inode)->ip_inode_lockres; + if (!had_lock) { + ocfs2_remove_holder(lockres, oh); + ocfs2_inode_unlock(inode, ex); + } +} + int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) { struct ocfs2_lock_res *lockres; diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index d293a22c32c5..a7fc18ba0dc1 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -70,6 +70,11 @@ struct ocfs2_orphan_scan_lvb { __be32 lvb_os_seqno; }; +struct ocfs2_lock_holder { + struct list_head oh_list; + struct pid *oh_owner_pid; +}; + /* ocfs2_inode_lock_full() 'arg_flags' flags */ /* don't wait on recovery. */ #define OCFS2_META_LOCK_RECOVERY (0x01) @@ -77,6 +82,8 @@ struct ocfs2_orphan_scan_lvb { #define OCFS2_META_LOCK_NOQUEUE (0x02) /* don't block waiting for the downconvert thread, instead return -EAGAIN */ #define OCFS2_LOCK_NONBLOCK (0x04) +/* just get back disk inode bh if we've got cluster lock. */ +#define OCFS2_META_LOCK_GETBH (0x08) /* Locking subclasses of inode cluster lock */ enum { @@ -170,4 +177,15 @@ void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); /* To set the locking protocol on module initialization */ void ocfs2_set_locking_protocol(void); + +/* The _tracker pair is used to avoid cluster recursive locking */ +int ocfs2_inode_lock_tracker(struct inode *inode, + struct buffer_head **ret_bh, + int ex, + struct ocfs2_lock_holder *oh); +void ocfs2_inode_unlock_tracker(struct inode *inode, + int ex, + struct ocfs2_lock_holder *oh, + int had_lock); + #endif /* DLMGLUE_H */ diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index e63af7ddfe68..594575e380e8 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -172,6 +172,7 @@ struct ocfs2_lock_res { struct list_head l_blocked_list; struct list_head l_mask_waiters; + struct list_head l_holders; unsigned long l_flags; char l_name[OCFS2_LOCK_ID_MAX_LEN]; diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c index 7a3754488312..9409aac232f7 100644 --- a/fs/orangefs/acl.c +++ b/fs/orangefs/acl.c @@ -61,9 +61,9 @@ struct posix_acl *orangefs_get_acl(struct inode *inode, int type) return acl; } -int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type) +static int __orangefs_set_acl(struct inode *inode, struct posix_acl *acl, + int type) { - struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); int error = 0; void *value = NULL; size_t size = 0; @@ -72,22 +72,6 @@ int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type) switch (type) { case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; - if (acl) { - umode_t mode; - - error = posix_acl_update_mode(inode, &mode, &acl); - if (error) { - gossip_err("%s: posix_acl_update_mode err: %d\n", - __func__, - error); - return error; - } - - if (inode->i_mode != mode) - SetModeFlag(orangefs_inode); - inode->i_mode = mode; - mark_inode_dirty_sync(inode); - } break; case ACL_TYPE_DEFAULT: name = XATTR_NAME_POSIX_ACL_DEFAULT; @@ -132,6 +116,29 @@ out: return error; } +int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type) +{ + int error; + + if (type == ACL_TYPE_ACCESS && acl) { + umode_t mode; + + error = posix_acl_update_mode(inode, &mode, &acl); + if (error) { + gossip_err("%s: posix_acl_update_mode err: %d\n", + __func__, + error); + return error; + } + + if (inode->i_mode != mode) + SetModeFlag(ORANGEFS_I(inode)); + inode->i_mode = mode; + mark_inode_dirty_sync(inode); + } + return __orangefs_set_acl(inode, acl, type); +} + int orangefs_init_acl(struct inode *inode, struct inode *dir) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); @@ -146,13 +153,14 @@ int orangefs_init_acl(struct inode *inode, struct inode *dir) return error; if (default_acl) { - error = orangefs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + error = __orangefs_set_acl(inode, default_acl, + ACL_TYPE_DEFAULT); posix_acl_release(default_acl); } if (acl) { if (!error) - error = orangefs_set_acl(inode, acl, ACL_TYPE_ACCESS); + error = __orangefs_set_acl(inode, acl, ACL_TYPE_ACCESS); posix_acl_release(acl); } diff --git a/fs/proc/array.c b/fs/proc/array.c index 81818adb8e9e..c932ec454625 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -416,7 +417,15 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, * esp and eip are intentionally zeroed out. There is no * non-racy way to read them without freezing the task. * Programs that need reliable values can use ptrace(2). + * + * The only exception is if the task is core dumping because + * a program is not able to use ptrace(2) in that case. It is + * safe because the task has stopped executing permanently. */ + if (permitted && (task->flags & PF_DUMPCORE)) { + eip = KSTK_EIP(task); + esp = KSTK_ESP(task); + } } get_task_comm(tcomm, task); diff --git a/fs/read_write.c b/fs/read_write.c index e479e24dcd4c..ba280596ec78 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -114,7 +114,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence, * In the generic case the entire file is data, so as long as * offset isn't at the end of the file then the offset is data. */ - if (offset >= eof) + if ((unsigned long long)offset >= eof) return -ENXIO; break; case SEEK_HOLE: @@ -122,7 +122,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence, * There is a virtual hole at the end of the file, so as long as * offset isn't i_size or larger, return i_size. */ - if (offset >= eof) + if ((unsigned long long)offset >= eof) return -ENXIO; offset = eof; break; @@ -1518,6 +1518,11 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, if (flags != 0) return -EINVAL; + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) + return -EISDIR; + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + return -EINVAL; + ret = rw_verify_area(READ, file_in, &pos_in, len); if (unlikely(ret)) return ret; diff --git a/fs/xattr.c b/fs/xattr.c index ed8c374570ed..932b9061a3a2 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -249,7 +249,7 @@ xattr_getsecurity(struct inode *inode, const char *name, void *value, } memcpy(value, buffer, len); out: - security_release_secctx(buffer, len); + kfree(buffer); out_noalloc: return len; } diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 339c696bbc01..bb2beaef531a 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -24,24 +24,6 @@ #include "kmem.h" #include "xfs_message.h" -/* - * Greedy allocation. May fail and may return vmalloced memory. - */ -void * -kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) -{ - void *ptr; - size_t kmsize = maxsize; - - while (!(ptr = vzalloc(kmsize))) { - if ((kmsize >>= 1) <= minsize) - kmsize = minsize; - } - if (ptr) - *size = kmsize; - return ptr; -} - void * kmem_alloc(size_t size, xfs_km_flags_t flags) { diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 689f746224e7..f0fc84fcaac2 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -69,8 +69,6 @@ static inline void kmem_free(const void *ptr) } -extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); - static inline void * kmem_zalloc(size_t size, xfs_km_flags_t flags) { diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index 33db69be4832..eed8f5867c46 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -157,7 +157,8 @@ __xfs_ag_resv_free( trace_xfs_ag_resv_free(pag, type, 0); resv = xfs_perag_resv(pag, type); - pag->pag_mount->m_ag_max_usable += resv->ar_asked; + if (pag->pag_agno == 0) + pag->pag_mount->m_ag_max_usable += resv->ar_asked; /* * AGFL blocks are always considered "free", so whatever * was reserved at mount time must be given back at umount. @@ -217,7 +218,14 @@ __xfs_ag_resv_init( return error; } - mp->m_ag_max_usable -= ask; + /* + * Reduce the maximum per-AG allocation length by however much we're + * trying to reserve for an AG. Since this is a filesystem-wide + * counter, we only make the adjustment for AG 0. This assumes that + * there aren't any AGs hungrier for per-AG reservation than AG 0. + */ + if (pag->pag_agno == 0) + mp->m_ag_max_usable -= ask; resv = xfs_perag_resv(pag, type); resv->ar_asked = ask; diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 9f06a211e157..c3702cda010a 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -1579,6 +1579,10 @@ xfs_alloc_ag_vextent_small( bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno, 0); + if (!bp) { + error = -EFSCORRUPTED; + goto error0; + } xfs_trans_binval(args->tp, bp); } args->len = 1; @@ -2136,6 +2140,10 @@ xfs_alloc_fix_freelist( if (error) goto out_agbp_relse; bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); + if (!bp) { + error = -EFSCORRUPTED; + goto out_agbp_relse; + } xfs_trans_binval(tp, bp); } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index d2f4ab175096..7eb99701054f 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4057,6 +4057,17 @@ xfs_trim_extent( } } +/* trim extent to within eof */ +void +xfs_trim_extent_eof( + struct xfs_bmbt_irec *irec, + struct xfs_inode *ip) + +{ + xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount, + i_size_read(VFS_I(ip)))); +} + /* * Trim the returned map to the required bounds */ diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index db53ac7ff6df..f1446d127120 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -196,6 +196,7 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, xfs_filblks_t len); +void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 083cdd6d6c28..ce6958b1385c 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -270,6 +270,7 @@ typedef struct xfs_inode_log_format { __uint32_t ilf_fields; /* flags for fields logged */ __uint16_t ilf_asize; /* size of attr d/ext/root */ __uint16_t ilf_dsize; /* size of data/ext/root */ + __uint32_t ilf_pad; /* pad for 64 bit boundary */ __uint64_t ilf_ino; /* inode number */ union { __uint32_t ilfu_rdev; /* rdev value for dev inode*/ @@ -280,7 +281,12 @@ typedef struct xfs_inode_log_format { __int32_t ilf_boffset; /* off of inode in buffer */ } xfs_inode_log_format_t; -typedef struct xfs_inode_log_format_32 { +/* + * Old 32 bit systems will log in this format without the 64 bit + * alignment padding. Recovery will detect this and convert it to the + * correct format. + */ +struct xfs_inode_log_format_32 { __uint16_t ilf_type; /* inode log item type */ __uint16_t ilf_size; /* size of this item */ __uint32_t ilf_fields; /* flags for fields logged */ @@ -294,24 +300,7 @@ typedef struct xfs_inode_log_format_32 { __int64_t ilf_blkno; /* blkno of inode buffer */ __int32_t ilf_len; /* len of inode buffer */ __int32_t ilf_boffset; /* off of inode in buffer */ -} __attribute__((packed)) xfs_inode_log_format_32_t; - -typedef struct xfs_inode_log_format_64 { - __uint16_t ilf_type; /* inode log item type */ - __uint16_t ilf_size; /* size of this item */ - __uint32_t ilf_fields; /* flags for fields logged */ - __uint16_t ilf_asize; /* size of attr d/ext/root */ - __uint16_t ilf_dsize; /* size of data/ext/root */ - __uint32_t ilf_pad; /* pad for 64 bit boundary */ - __uint64_t ilf_ino; /* inode number */ - union { - __uint32_t ilfu_rdev; /* rdev value for dev inode*/ - uuid_t ilfu_uuid; /* mount point value */ - } ilf_u; - __int64_t ilf_blkno; /* blkno of inode buffer */ - __int32_t ilf_len; /* len of inode buffer */ - __int32_t ilf_boffset; /* off of inode in buffer */ -} xfs_inode_log_format_64_t; +} __attribute__((packed)); /* diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 7034e17535de..3354140de07e 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -247,6 +247,8 @@ xfs_set_mode(struct inode *inode, umode_t mode) int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { + umode_t mode; + bool set_mode = false; int error = 0; if (!acl) @@ -257,16 +259,24 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) return error; if (type == ACL_TYPE_ACCESS) { - umode_t mode; - error = posix_acl_update_mode(inode, &mode, &acl); if (error) return error; - error = xfs_set_mode(inode, mode); - if (error) - return error; + set_mode = true; } set_acl: - return __xfs_set_acl(inode, acl, type); + error = __xfs_set_acl(inode, acl, type); + if (error) + return error; + + /* + * We set the mode after successfully updating the ACL xattr because the + * xattr update can fail at ENOSPC and we don't want to change the mode + * if the ACL update hasn't been applied. + */ + if (set_mode) + error = xfs_set_mode(inode, mode); + + return error; } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index d23889e0bedc..d31cd1ebd8e9 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -335,7 +335,8 @@ xfs_end_io( error = xfs_reflink_end_cow(ip, offset, size); break; case XFS_IO_UNWRITTEN: - error = xfs_iomap_write_unwritten(ip, offset, size); + /* writeback should never update isize */ + error = xfs_iomap_write_unwritten(ip, offset, size, false); break; default: ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); @@ -437,6 +438,19 @@ xfs_imap_valid( { offset >>= inode->i_blkbits; + /* + * We have to make sure the cached mapping is within EOF to protect + * against eofblocks trimming on file release leaving us with a stale + * mapping. Otherwise, a page for a subsequent file extending buffered + * write could get picked up by this writeback cycle and written to the + * wrong blocks. + * + * Note that what we really want here is a generic mapping invalidation + * mechanism to protect us from arbitrary extent modifying contexts, not + * just eofblocks. + */ + xfs_trim_extent_eof(imap, XFS_I(inode)); + return offset >= imap->br_startoff && offset < imap->br_startoff + imap->br_blockcount; } @@ -725,6 +739,14 @@ xfs_vm_invalidatepage( { trace_xfs_invalidatepage(page->mapping->host, page, offset, length); + + /* + * If we are invalidating the entire page, clear the dirty state from it + * so that we can check for attempts to release dirty cached pages in + * xfs_vm_releasepage(). + */ + if (offset == 0 && length >= PAGE_SIZE) + cancel_dirty_page(page); block_invalidatepage(page, offset, length); } @@ -1180,25 +1202,27 @@ xfs_vm_releasepage( * mm accommodates an old ext3 case where clean pages might not have had * the dirty bit cleared. Thus, it can send actual dirty pages to * ->releasepage() via shrink_active_list(). Conversely, - * block_invalidatepage() can send pages that are still marked dirty - * but otherwise have invalidated buffers. + * block_invalidatepage() can send pages that are still marked dirty but + * otherwise have invalidated buffers. * * We want to release the latter to avoid unnecessary buildup of the - * LRU, skip the former and warn if we've left any lingering - * delalloc/unwritten buffers on clean pages. Skip pages with delalloc - * or unwritten buffers and warn if the page is not dirty. Otherwise - * try to release the buffers. + * LRU, so xfs_vm_invalidatepage() clears the page dirty flag on pages + * that are entirely invalidated and need to be released. Hence the + * only time we should get dirty pages here is through + * shrink_active_list() and so we can simply skip those now. + * + * warn if we've left any lingering delalloc/unwritten buffers on clean + * or invalidated pages we are about to release. */ + if (PageDirty(page)) + return 0; + xfs_count_page_state(page, &delalloc, &unwritten); - if (delalloc) { - WARN_ON_ONCE(!PageDirty(page)); + if (WARN_ON_ONCE(delalloc)) return 0; - } - if (unwritten) { - WARN_ON_ONCE(!PageDirty(page)); + if (WARN_ON_ONCE(unwritten)) return 0; - } return try_to_free_buffers(page); } @@ -1532,6 +1556,21 @@ xfs_end_io_direct_write( return 0; } + if (flags & XFS_DIO_FLAG_COW) + error = xfs_reflink_end_cow(ip, offset, size); + + /* + * Unwritten conversion updates the in-core isize after extent + * conversion but before updating the on-disk size. Updating isize any + * earlier allows a racing dio read to find unwritten extents before + * they are converted. + */ + if (flags & XFS_DIO_FLAG_UNWRITTEN) { + trace_xfs_end_io_direct_write_unwritten(ip, offset, size); + + return xfs_iomap_write_unwritten(ip, offset, size, true); + } + /* * We need to update the in-core inode size here so that we don't end up * with the on-disk inode size being outside the in-core inode size. We @@ -1548,13 +1587,6 @@ xfs_end_io_direct_write( i_size_write(inode, offset + size); spin_unlock(&ip->i_flags_lock); - if (flags & XFS_DIO_FLAG_COW) - error = xfs_reflink_end_cow(ip, offset, size); - if (flags & XFS_DIO_FLAG_UNWRITTEN) { - trace_xfs_end_io_direct_write_unwritten(ip, offset, size); - - error = xfs_iomap_write_unwritten(ip, offset, size); - } if (flags & XFS_DIO_FLAG_APPEND) { trace_xfs_end_io_direct_write_append(ip, offset, size); diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index be0b79d8900f..c6643004e583 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -302,6 +302,8 @@ xfs_attr3_node_inactive( &bp, XFS_ATTR_FORK); if (error) return error; + node = bp->b_addr; + btree = dp->d_ops->node_tree_p(node); child_fsb = be32_to_cpu(btree[i + 1].before); xfs_trans_brelse(*trans, bp); } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 5ffefac081f7..cb62871fb91d 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -84,6 +84,7 @@ xfs_zero_extent( GFP_NOFS, true); } +#ifdef CONFIG_XFS_RT int xfs_bmap_rtalloc( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ @@ -195,6 +196,7 @@ xfs_bmap_rtalloc( } return 0; } +#endif /* CONFIG_XFS_RT */ /* * Check if the endoff is outside the last extent. If so the caller will grow @@ -1445,7 +1447,19 @@ xfs_shift_file_space( return error; /* - * The extent shiting code works on extent granularity. So, if + * Clean out anything hanging around in the cow fork now that + * we've flushed all the dirty data out to disk to avoid having + * CoW extents at the wrong offsets. + */ + if (xfs_is_reflink_inode(ip)) { + error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF, + true); + if (error) + return error; + } + + /* + * The extent shifting code works on extent granularity. So, if * stop_fsb is not the starting block of extent, we need to split * the extent at stop_fsb. */ @@ -2094,11 +2108,31 @@ xfs_swap_extents( ip->i_d.di_flags2 |= tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK; tip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; tip->i_d.di_flags2 |= f & XFS_DIFLAG2_REFLINK; + } + + /* Swap the cow forks. */ + if (xfs_sb_version_hasreflink(&mp->m_sb)) { + xfs_extnum_t extnum; + + ASSERT(ip->i_cformat == XFS_DINODE_FMT_EXTENTS); + ASSERT(tip->i_cformat == XFS_DINODE_FMT_EXTENTS); + + extnum = ip->i_cnextents; + ip->i_cnextents = tip->i_cnextents; + tip->i_cnextents = extnum; + cowfp = ip->i_cowfp; ip->i_cowfp = tip->i_cowfp; tip->i_cowfp = cowfp; - xfs_inode_set_cowblocks_tag(ip); - xfs_inode_set_cowblocks_tag(tip); + + if (ip->i_cowfp && ip->i_cnextents) + xfs_inode_set_cowblocks_tag(ip); + else + xfs_inode_clear_cowblocks_tag(ip); + if (tip->i_cowfp && tip->i_cnextents) + xfs_inode_set_cowblocks_tag(tip); + else + xfs_inode_clear_cowblocks_tag(tip); } xfs_trans_log_inode(tp, ip, src_log_flags); diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index f1005393785c..ce330f06563e 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -28,7 +28,20 @@ struct xfs_mount; struct xfs_trans; struct xfs_bmalloca; +#ifdef CONFIG_XFS_RT int xfs_bmap_rtalloc(struct xfs_bmalloca *ap); +#else /* !CONFIG_XFS_RT */ +/* + * Attempts to allocate RT extents when RT is disable indicates corruption and + * should trigger a shutdown. + */ +static inline int +xfs_bmap_rtalloc(struct xfs_bmalloca *ap) +{ + return -EFSCORRUPTED; +} +#endif /* CONFIG_XFS_RT */ + int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff, int whichfork, int *eof); int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index ed7ee4e8af73..bcf72970ca6b 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -167,7 +167,7 @@ xfs_verifier_error( { struct xfs_mount *mp = bp->b_target->bt_mount; - xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx", + xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx", bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", __return_address, bp->b_ops->name, bp->b_bn); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 586b398f268d..362c6b4c1186 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -92,7 +92,7 @@ xfs_zero_range( xfs_off_t count, bool *did_zero) { - return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops); + return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops); } int diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 9e795ab08a53..fe9a9a183b2d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1632,10 +1632,12 @@ xfs_itruncate_extents( goto out; /* - * Clear the reflink flag if we truncated everything. + * Clear the reflink flag if there are no data fork blocks and + * there are no extents staged in the cow fork. */ - if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) { - ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; + if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) { + if (ip->i_d.di_nblocks == 0) + ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; xfs_inode_clear_cowblocks_tag(ip); } diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 94915747042c..d0a3c4bd2c38 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -364,6 +364,9 @@ xfs_inode_to_log_dinode( to->di_dmstate = from->di_dmstate; to->di_flags = from->di_flags; + /* log a dummy value to ensure log structure is fully initialised */ + to->di_next_unlinked = NULLAGINO; + if (from->di_version == 3) { to->di_changecount = inode->i_version; to->di_crtime.t_sec = from->di_crtime.t_sec; @@ -404,6 +407,11 @@ xfs_inode_item_format_core( * the second with the on-disk inode structure, and a possible third and/or * fourth with the inode data/extents/b-tree root and inode attributes * data/extents/b-tree root. + * + * Note: Always use the 64 bit inode log format structure so we don't + * leave an uninitialised hole in the format item on 64 bit systems. Log + * recovery on 32 bit systems handles this just fine, so there's no reason + * for not using an initialising the properly padded structure all the time. */ STATIC void xfs_inode_item_format( @@ -412,8 +420,8 @@ xfs_inode_item_format( { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; - struct xfs_inode_log_format *ilf; struct xfs_log_iovec *vecp = NULL; + struct xfs_inode_log_format *ilf; ASSERT(ip->i_d.di_version > 1); @@ -425,7 +433,17 @@ xfs_inode_item_format( ilf->ilf_boffset = ip->i_imap.im_boffset; ilf->ilf_fields = XFS_ILOG_CORE; ilf->ilf_size = 2; /* format + core */ - xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format)); + + /* + * make sure we don't leak uninitialised data into the log in the case + * when we don't log every field in the inode. + */ + ilf->ilf_dsize = 0; + ilf->ilf_asize = 0; + ilf->ilf_pad = 0; + memset(&ilf->ilf_u.ilfu_uuid, 0, sizeof(ilf->ilf_u.ilfu_uuid)); + + xlog_finish_iovec(lv, vecp, sizeof(*ilf)); xfs_inode_item_format_core(ip, lv, &vecp); xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp); @@ -745,7 +763,7 @@ xfs_iflush_done( */ iip = INODE_ITEM(blip); if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) || - lip->li_flags & XFS_LI_FAILED) + (blip->li_flags & XFS_LI_FAILED)) need_ail++; blip = next; @@ -855,48 +873,30 @@ xfs_istale_done( } /* - * convert an xfs_inode_log_format struct from either 32 or 64 bit versions - * (which can have different field alignments) to the native version + * convert an xfs_inode_log_format struct from the old 32 bit version + * (which can have different field alignments) to the native 64 bit version */ int xfs_inode_item_format_convert( - xfs_log_iovec_t *buf, - xfs_inode_log_format_t *in_f) + struct xfs_log_iovec *buf, + struct xfs_inode_log_format *in_f) { - if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) { - xfs_inode_log_format_32_t *in_f32 = buf->i_addr; + struct xfs_inode_log_format_32 *in_f32 = buf->i_addr; - in_f->ilf_type = in_f32->ilf_type; - in_f->ilf_size = in_f32->ilf_size; - in_f->ilf_fields = in_f32->ilf_fields; - in_f->ilf_asize = in_f32->ilf_asize; - in_f->ilf_dsize = in_f32->ilf_dsize; - in_f->ilf_ino = in_f32->ilf_ino; - /* copy biggest field of ilf_u */ - memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, - in_f32->ilf_u.ilfu_uuid.__u_bits, - sizeof(uuid_t)); - in_f->ilf_blkno = in_f32->ilf_blkno; - in_f->ilf_len = in_f32->ilf_len; - in_f->ilf_boffset = in_f32->ilf_boffset; - return 0; - } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){ - xfs_inode_log_format_64_t *in_f64 = buf->i_addr; + if (buf->i_len != sizeof(*in_f32)) + return -EFSCORRUPTED; - in_f->ilf_type = in_f64->ilf_type; - in_f->ilf_size = in_f64->ilf_size; - in_f->ilf_fields = in_f64->ilf_fields; - in_f->ilf_asize = in_f64->ilf_asize; - in_f->ilf_dsize = in_f64->ilf_dsize; - in_f->ilf_ino = in_f64->ilf_ino; - /* copy biggest field of ilf_u */ - memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, - in_f64->ilf_u.ilfu_uuid.__u_bits, - sizeof(uuid_t)); - in_f->ilf_blkno = in_f64->ilf_blkno; - in_f->ilf_len = in_f64->ilf_len; - in_f->ilf_boffset = in_f64->ilf_boffset; - return 0; - } - return -EFSCORRUPTED; + in_f->ilf_type = in_f32->ilf_type; + in_f->ilf_size = in_f32->ilf_size; + in_f->ilf_fields = in_f32->ilf_fields; + in_f->ilf_asize = in_f32->ilf_asize; + in_f->ilf_dsize = in_f32->ilf_dsize; + in_f->ilf_ino = in_f32->ilf_ino; + /* copy biggest field of ilf_u */ + memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, + in_f32->ilf_u.ilfu_uuid.__u_bits, sizeof(uuid_t)); + in_f->ilf_blkno = in_f32->ilf_blkno; + in_f->ilf_len = in_f32->ilf_len; + in_f->ilf_boffset = in_f32->ilf_boffset; + return 0; } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index bce2e260f55e..6c95812120eb 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1085,6 +1085,7 @@ xfs_ioctl_setattr_dax_invalidate( int *join_flags) { struct inode *inode = VFS_I(ip); + struct super_block *sb = inode->i_sb; int error; *join_flags = 0; @@ -1097,7 +1098,7 @@ xfs_ioctl_setattr_dax_invalidate( if (fa->fsx_xflags & FS_XFLAG_DAX) { if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) return -EINVAL; - if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE) + if (bdev_dax_supported(sb, sb->s_blocksize) < 0) return -EINVAL; } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 65740d1cbd92..f286f63c430c 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -836,7 +836,8 @@ int xfs_iomap_write_unwritten( xfs_inode_t *ip, xfs_off_t offset, - xfs_off_t count) + xfs_off_t count, + bool update_isize) { xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb; @@ -847,6 +848,7 @@ xfs_iomap_write_unwritten( xfs_trans_t *tp; xfs_bmbt_irec_t imap; struct xfs_defer_ops dfops; + struct inode *inode = VFS_I(ip); xfs_fsize_t i_size; uint resblks; int error; @@ -906,7 +908,8 @@ xfs_iomap_write_unwritten( i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb); if (i_size > offset + count) i_size = offset + count; - + if (update_isize && i_size > i_size_read(inode)) + i_size_write(inode, i_size); i_size = xfs_new_eof(ip, i_size); if (i_size) { ip->i_d.di_size = i_size; diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 6d45cf01fcff..d71703af5c76 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -27,7 +27,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, struct xfs_bmbt_irec *, int); int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t, struct xfs_bmbt_irec *); -int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t); +int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool); void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, struct xfs_bmbt_irec *); diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index d8a77dbf4e3a..26d67ce3c18d 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -361,7 +361,6 @@ xfs_bulkstat( xfs_agino_t agino; /* inode # in allocation group */ xfs_agnumber_t agno; /* allocation group number */ xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ - size_t irbsize; /* size of irec buffer in bytes */ xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ int nirbuf; /* size of irbuf */ int ubcount; /* size of user's buffer */ @@ -388,11 +387,10 @@ xfs_bulkstat( *ubcountp = 0; *done = 0; - irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); + irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP); if (!irbuf) return -ENOMEM; - - nirbuf = irbsize / sizeof(*irbuf); + nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf); /* * Loop over the allocation groups, starting from the last diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index 0c381d71b242..0492436a053f 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -134,7 +134,7 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_icreate_log, 28); XFS_CHECK_STRUCT_SIZE(struct xfs_ictimestamp, 8); XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_32, 52); - XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_64, 56); + XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format, 56); XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat, 20); XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16); } diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 93a7aafa56d6..cecd37569ddb 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -279,7 +279,7 @@ xfs_fs_commit_blocks( (end - 1) >> PAGE_SHIFT); WARN_ON_ONCE(error); - error = xfs_iomap_write_unwritten(ip, start, length); + error = xfs_iomap_write_unwritten(ip, start, length, false); if (error) goto out_drop_iolock; } diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 0015c19c7455..17d3c964a2a2 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -767,7 +767,13 @@ xfs_reflink_end_cow( /* If there is a hole at end_fsb - 1 go to the previous extent */ if (eof || got.br_startoff > end_fsb) { - ASSERT(idx > 0); + /* + * In case of racing, overlapping AIO writes no COW extents + * might be left by the time I/O completes for the loser of + * the race. In that case we are done. + */ + if (idx <= 0) + goto out_cancel; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, --idx), &got); } @@ -841,6 +847,7 @@ next_extent: out_defer: xfs_defer_cancel(&dfops); +out_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); out: diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 0504ef8f3aa3..976f8ac26665 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -115,15 +115,35 @@ do { \ (__ret); \ }) -#define this_cpu_generic_read(pcp) \ +#define __this_cpu_generic_read_nopreempt(pcp) \ ({ \ typeof(pcp) __ret; \ preempt_disable_notrace(); \ - __ret = raw_cpu_generic_read(pcp); \ + __ret = READ_ONCE(*raw_cpu_ptr(&(pcp))); \ preempt_enable_notrace(); \ __ret; \ }) +#define __this_cpu_generic_read_noirq(pcp) \ +({ \ + typeof(pcp) __ret; \ + unsigned long __flags; \ + raw_local_irq_save(__flags); \ + __ret = raw_cpu_generic_read(pcp); \ + raw_local_irq_restore(__flags); \ + __ret; \ +}) + +#define this_cpu_generic_read(pcp) \ +({ \ + typeof(pcp) __ret; \ + if (__native_word(pcp)) \ + __ret = __this_cpu_generic_read_nopreempt(pcp); \ + else \ + __ret = __this_cpu_generic_read_noirq(pcp); \ + __ret; \ +}) + #define this_cpu_generic_to_op(pcp, val, op) \ do { \ unsigned long __flags; \ diff --git a/include/keys/user-type.h b/include/keys/user-type.h index c56fef40f53e..e098cbe27db5 100644 --- a/include/keys/user-type.h +++ b/include/keys/user-type.h @@ -48,9 +48,14 @@ extern void user_describe(const struct key *user, struct seq_file *m); extern long user_read(const struct key *key, char __user *buffer, size_t buflen); -static inline const struct user_key_payload *user_key_payload(const struct key *key) +static inline const struct user_key_payload *user_key_payload_rcu(const struct key *key) { - return (struct user_key_payload *)rcu_dereference_key(key); + return (struct user_key_payload *)dereference_key_rcu(key); +} + +static inline struct user_key_payload *user_key_payload_locked(const struct key *key) +{ + return (struct user_key_payload *)dereference_key_locked((struct key *)key); } #endif /* CONFIG_KEYS */ diff --git a/include/linux/audit.h b/include/linux/audit.h index 9d4443f93db6..2be99b276d29 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -387,6 +387,20 @@ static inline int audit_socketcall(int nargs, unsigned long *args) return __audit_socketcall(nargs, args); return 0; } + +static inline int audit_socketcall_compat(int nargs, u32 *args) +{ + unsigned long a[AUDITSC_ARGS]; + int i; + + if (audit_dummy_context()) + return 0; + + for (i = 0; i < nargs; i++) + a[i] = (unsigned long)args[i]; + return __audit_socketcall(nargs, a); +} + static inline int audit_sockaddr(int len, void *addr) { if (unlikely(!audit_dummy_context())) @@ -513,6 +527,12 @@ static inline int audit_socketcall(int nargs, unsigned long *args) { return 0; } + +static inline int audit_socketcall_compat(int nargs, u32 *args) +{ + return 0; +} + static inline void audit_fd_pair(int fd1, int fd2) { } static inline int audit_sockaddr(int len, void *addr) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index ebbacd14d450..447a915db25d 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -226,6 +226,7 @@ int generic_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page *, void *); void page_zero_new_buffers(struct page *page, unsigned from, unsigned to); +void clean_page_buffers(struct page *page); int cont_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t *, loff_t *); diff --git a/include/linux/ccp.h b/include/linux/ccp.h index edc5d04b9632..1cfe5ef3060b 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -1,7 +1,7 @@ /* * AMD Cryptographic Coprocessor (CCP) driver * - * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. + * Copyright (C) 2013,2017 Advanced Micro Devices, Inc. * * Author: Tom Lendacky * Author: Gary R Hook @@ -222,6 +222,7 @@ enum ccp_xts_aes_unit_size { * AES operation the new IV overwrites the old IV. */ struct ccp_xts_aes_engine { + enum ccp_aes_type type; enum ccp_aes_action action; enum ccp_xts_aes_unit_size unit_size; diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 928e5ca0caee..1659f98984fc 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -66,18 +66,22 @@ /* * Force always-inline if the user requests it so via the .config, - * or if gcc is too old: + * or if gcc is too old. + * GCC does not warn about unused static inline functions for + * -Wunused-function. This turns out to avoid the need for complex #ifdef + * directives. Suppress the warning in clang as well by using "unused" + * function attribute, which is redundant but not harmful for gcc. */ #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) -#define inline inline __attribute__((always_inline)) notrace -#define __inline__ __inline__ __attribute__((always_inline)) notrace -#define __inline __inline __attribute__((always_inline)) notrace +#define inline inline __attribute__((always_inline,unused)) notrace +#define __inline__ __inline__ __attribute__((always_inline,unused)) notrace +#define __inline __inline __attribute__((always_inline,unused)) notrace #else /* A lot of inline functions can cause havoc with function tracing */ -#define inline inline notrace -#define __inline__ __inline__ notrace -#define __inline __inline notrace +#define inline inline __attribute__((unused)) notrace +#define __inline__ __inline__ __attribute__((unused)) notrace +#define __inline __inline __attribute__((unused)) notrace #endif #define __always_inline inline __attribute__((always_inline)) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index cc57986d3bfe..0b985998643a 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -120,6 +120,14 @@ struct cpufreq_policy { bool fast_switch_possible; bool fast_switch_enabled; + /* + * Preferred average time interval between consecutive invocations of + * the driver to set the frequency for this policy. To be set by the + * scaling driver (0, which is the default, means no preference). + */ + unsigned int up_transition_delay_us; + unsigned int down_transition_delay_us; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ unsigned int cached_target_freq; int cached_resolved_idx; diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index cd32a49ae81e..d807fa9b2051 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -55,7 +55,9 @@ static inline void cpuset_dec(void) extern int cpuset_init(void); extern void cpuset_init_smp(void); +extern void cpuset_force_rebuild(void); extern void cpuset_update_active_cpus(bool cpu_online); +extern void cpuset_wait_for_hotplug(void); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern void cpuset_cpus_allowed_fallback(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); @@ -168,11 +170,15 @@ static inline bool cpusets_enabled(void) { return false; } static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} +static inline void cpuset_force_rebuild(void) { } + static inline void cpuset_update_active_cpus(bool cpu_online) { partition_sched_domains(1, NULL, NULL); } +static inline void cpuset_wait_for_hotplug(void) { } + static inline void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask) { diff --git a/include/linux/efi.h b/include/linux/efi.h index cba7177cbec7..5e204a5ab8c4 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1427,7 +1427,7 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, unsigned long *load_addr, unsigned long *load_size); -efi_status_t efi_parse_options(char *cmdline); +efi_status_t efi_parse_options(char const *cmdline); efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, struct screen_info *si, efi_guid_t *proto, diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index e46e7d10312b..2a0c453d7235 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -32,9 +32,15 @@ /* 0, 1(node nid), 2(meta nid) are reserved node id */ #define F2FS_RESERVED_NODE_NUM 3 -#define F2FS_ROOT_INO(sbi) (sbi->root_ino_num) -#define F2FS_NODE_INO(sbi) (sbi->node_ino_num) -#define F2FS_META_INO(sbi) (sbi->meta_ino_num) +#define F2FS_ROOT_INO(sbi) ((sbi)->root_ino_num) +#define F2FS_NODE_INO(sbi) ((sbi)->node_ino_num) +#define F2FS_META_INO(sbi) ((sbi)->meta_ino_num) + +#define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */ +#define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */ +#define F2FS_IO_SIZE_BYTES(sbi) (1 << ((sbi)->write_io_size_bits + 12)) /* B */ +#define F2FS_IO_SIZE_BITS(sbi) ((sbi)->write_io_size_bits) /* power of 2 */ +#define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1) /* This flag is used by node and meta inodes, and by recovery */ #define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) @@ -52,10 +58,17 @@ #define VERSION_LEN 256 #define MAX_VOLUME_NAME 512 +#define MAX_PATH_LEN 64 +#define MAX_DEVICES 8 /* * For superblock */ +struct f2fs_device { + __u8 path[MAX_PATH_LEN]; + __le32 total_segments; +} __packed; + struct f2fs_super_block { __le32 magic; /* Magic Number */ __le16 major_ver; /* Major Version */ @@ -94,12 +107,15 @@ struct f2fs_super_block { __le32 feature; /* defined features */ __u8 encryption_level; /* versioning level for encryption */ __u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ - __u8 reserved[871]; /* valid reserved region */ + struct f2fs_device devs[MAX_DEVICES]; /* device list */ + __u8 reserved[327]; /* valid reserved region */ } __packed; /* * For checkpoint */ +#define CP_TRIMMED_FLAG 0x00000100 +#define CP_NAT_BITS_FLAG 0x00000080 #define CP_CRC_RECOVERY_FLAG 0x00000040 #define CP_FASTBOOT_FLAG 0x00000020 #define CP_FSCK_FLAG 0x00000010 @@ -146,7 +162,7 @@ struct f2fs_checkpoint { */ #define F2FS_ORPHANS_PER_BLOCK 1020 -#define GET_ORPHAN_BLOCKS(n) ((n + F2FS_ORPHANS_PER_BLOCK - 1) / \ +#define GET_ORPHAN_BLOCKS(n) (((n) + F2FS_ORPHANS_PER_BLOCK - 1) / \ F2FS_ORPHANS_PER_BLOCK) struct f2fs_orphan_block { @@ -170,6 +186,8 @@ struct f2fs_extent { #define F2FS_NAME_LEN 255 #define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */ #define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ +#define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \ + get_extra_isize(inode)) #define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */ #define ADDRS_PER_INODE(inode) addrs_per_inode(inode) #define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ @@ -189,9 +207,7 @@ struct f2fs_extent { #define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ #define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ #define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ - -#define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ - F2FS_INLINE_XATTR_ADDRS - 1)) +#define F2FS_EXTRA_ATTR 0x20 /* file having extra attribute */ struct f2fs_inode { __le16 i_mode; /* file mode */ @@ -219,8 +235,16 @@ struct f2fs_inode { struct f2fs_extent i_ext; /* caching a largest extent */ - __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ - + union { + struct { + __le16 i_extra_isize; /* extra inode attribute size */ + __le16 i_padding; /* padding */ + __le32 i_projid; /* project id */ + __le32 i_inode_checksum;/* inode meta checksum */ + __le32 i_extra_end[0]; /* for attribute size calculation */ + }; + __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ + }; __le32 i_nid[DEF_NIDS_PER_INODE]; /* direct(2), indirect(2), double_indirect(1) node id */ } __packed; @@ -264,6 +288,7 @@ struct f2fs_node { * For NAT entries */ #define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry)) +#define NAT_ENTRY_BITMAP_SIZE ((NAT_ENTRY_PER_BLOCK + 7) / 8) struct f2fs_nat_entry { __u8 version; /* latest version of cached nat entry */ @@ -439,7 +464,7 @@ typedef __le32 f2fs_hash_t; #define F2FS_SLOT_LEN 8 #define F2FS_SLOT_LEN_BITS 3 -#define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) +#define GET_DENTRY_SLOTS(x) (((x) + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) /* MAX level for dir lookup */ #define MAX_DIR_HASH_DEPTH 63 @@ -448,7 +473,7 @@ typedef __le32 f2fs_hash_t; #define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1)) /* - * space utilization of regular dentry and inline dentry + * space utilization of regular dentry and inline dentry (w/o extra reservation) * regular dentry inline dentry * bitmap 1 * 27 = 27 1 * 23 = 23 * reserved 1 * 3 = 3 1 * 7 = 7 @@ -484,24 +509,6 @@ struct f2fs_dentry_block { __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN]; } __packed; -/* for inline dir */ -#define NR_INLINE_DENTRY (MAX_INLINE_DATA * BITS_PER_BYTE / \ - ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ - BITS_PER_BYTE + 1)) -#define INLINE_DENTRY_BITMAP_SIZE ((NR_INLINE_DENTRY + \ - BITS_PER_BYTE - 1) / BITS_PER_BYTE) -#define INLINE_RESERVED_SIZE (MAX_INLINE_DATA - \ - ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ - NR_INLINE_DENTRY + INLINE_DENTRY_BITMAP_SIZE)) - -/* inline directory entry structure */ -struct f2fs_inline_dentry { - __u8 dentry_bitmap[INLINE_DENTRY_BITMAP_SIZE]; - __u8 reserved[INLINE_RESERVED_SIZE]; - struct f2fs_dir_entry dentry[NR_INLINE_DENTRY]; - __u8 filename[NR_INLINE_DENTRY][F2FS_SLOT_LEN]; -} __packed; - /* file types used in inode_info->flags */ enum { F2FS_FT_UNKNOWN, @@ -517,4 +524,6 @@ enum { #define S_SHIFT 12 +#define F2FS_DEF_PROJID 0 /* default project ID */ + #endif /* _LINUX_F2FS_FS_H */ diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h new file mode 100644 index 000000000000..4022c61f7e9b --- /dev/null +++ b/include/linux/fscrypt_common.h @@ -0,0 +1,138 @@ +/* + * fscrypt_common.h: common declarations for per-file encryption + * + * Copyright (C) 2015, Google, Inc. + * + * Written by Michael Halcrow, 2015. + * Modified by Jaegeuk Kim, 2015. + */ + +#ifndef _LINUX_FSCRYPT_COMMON_H +#define _LINUX_FSCRYPT_COMMON_H + +#include +#include +#include +#include +#include +#include +#include + +#define FS_CRYPTO_BLOCK_SIZE 16 + +struct fscrypt_info; + +struct fscrypt_ctx { + union { + struct { + struct page *bounce_page; /* Ciphertext page */ + struct page *control_page; /* Original page */ + } w; + struct { + struct bio *bio; + struct work_struct work; + } r; + struct list_head free_list; /* Free list */ + }; + u8 flags; /* Flags */ +}; + +/** + * For encrypted symlinks, the ciphertext length is stored at the beginning + * of the string in little-endian format. + */ +struct fscrypt_symlink_data { + __le16 len; + char encrypted_path[1]; +} __packed; + +struct fscrypt_str { + unsigned char *name; + u32 len; +}; + +struct fscrypt_name { + const struct qstr *usr_fname; + struct fscrypt_str disk_name; + u32 hash; + u32 minor_hash; + struct fscrypt_str crypto_buf; +}; + +#define FSTR_INIT(n, l) { .name = n, .len = l } +#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len) +#define fname_name(p) ((p)->disk_name.name) +#define fname_len(p) ((p)->disk_name.len) + +/* + * fscrypt superblock flags + */ +#define FS_CFLG_OWN_PAGES (1U << 1) + +/* + * crypto opertions for filesystems + */ +struct fscrypt_operations { + unsigned int flags; + const char *key_prefix; + int (*get_context)(struct inode *, void *, size_t); + int (*set_context)(struct inode *, const void *, size_t, void *); + int (*dummy_context)(struct inode *); + bool (*is_encrypted)(struct inode *); + bool (*empty_dir)(struct inode *); + unsigned (*max_namelen)(struct inode *); +}; + +static inline bool fscrypt_dummy_context_enabled(struct inode *inode) +{ + if (inode->i_sb->s_cop->dummy_context && + inode->i_sb->s_cop->dummy_context(inode)) + return true; + return false; +} + +static inline bool fscrypt_valid_enc_modes(u32 contents_mode, + u32 filenames_mode) +{ + if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC && + filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS) + return true; + + if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS && + filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) + return true; + + return false; +} + +static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) +{ + if (str->len == 1 && str->name[0] == '.') + return true; + + if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') + return true; + + return false; +} + +static inline struct page *fscrypt_control_page(struct page *page) +{ +#if IS_ENABLED(CONFIG_FS_ENCRYPTION) + return ((struct fscrypt_ctx *)page_private(page))->w.control_page; +#else + WARN_ON_ONCE(1); + return ERR_PTR(-EINVAL); +#endif +} + +static inline int fscrypt_has_encryption_key(const struct inode *inode) +{ +#if IS_ENABLED(CONFIG_FS_ENCRYPTION) + return (inode->i_crypt_info != NULL); +#else + return 0; +#endif +} + +#endif /* _LINUX_FSCRYPT_COMMON_H */ diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h new file mode 100644 index 000000000000..ec406aed2f2f --- /dev/null +++ b/include/linux/fscrypt_notsupp.h @@ -0,0 +1,177 @@ +/* + * fscrypt_notsupp.h + * + * This stubs out the fscrypt functions for filesystems configured without + * encryption support. + */ + +#ifndef _LINUX_FSCRYPT_NOTSUPP_H +#define _LINUX_FSCRYPT_NOTSUPP_H + +#include + +/* crypto.c */ +static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, + gfp_t gfp_flags) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void fscrypt_release_ctx(struct fscrypt_ctx *ctx) +{ + return; +} + +static inline struct page *fscrypt_encrypt_page(const struct inode *inode, + struct page *page, + unsigned int len, + unsigned int offs, + u64 lblk_num, gfp_t gfp_flags) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline int fscrypt_decrypt_page(const struct inode *inode, + struct page *page, + unsigned int len, unsigned int offs, + u64 lblk_num) +{ + return -EOPNOTSUPP; +} + + +static inline void fscrypt_restore_control_page(struct page *page) +{ + return; +} + +static inline void fscrypt_set_d_op(struct dentry *dentry) +{ + return; +} + +static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry) +{ + return; +} + +/* policy.c */ +static inline int fscrypt_ioctl_set_policy(struct file *filp, + const void __user *arg) +{ + return -EOPNOTSUPP; +} + +static inline int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg) +{ + return -EOPNOTSUPP; +} + +static inline int fscrypt_has_permitted_context(struct inode *parent, + struct inode *child) +{ + return 0; +} + +static inline int fscrypt_inherit_context(struct inode *parent, + struct inode *child, + void *fs_data, bool preload) +{ + return -EOPNOTSUPP; +} + +/* keyinfo.c */ +static inline int fscrypt_get_encryption_info(struct inode *inode) +{ + return -EOPNOTSUPP; +} + +static inline void fscrypt_put_encryption_info(struct inode *inode, + struct fscrypt_info *ci) +{ + return; +} + + /* fname.c */ +static inline int fscrypt_setup_filename(struct inode *dir, + const struct qstr *iname, + int lookup, struct fscrypt_name *fname) +{ + if (dir->i_sb->s_cop->is_encrypted(dir)) + return -EOPNOTSUPP; + + memset(fname, 0, sizeof(struct fscrypt_name)); + fname->usr_fname = iname; + fname->disk_name.name = (unsigned char *)iname->name; + fname->disk_name.len = iname->len; + return 0; +} + +static inline void fscrypt_free_filename(struct fscrypt_name *fname) +{ + return; +} + +static inline u32 fscrypt_fname_encrypted_size(const struct inode *inode, + u32 ilen) +{ + /* never happens */ + WARN_ON(1); + return 0; +} + +static inline int fscrypt_fname_alloc_buffer(const struct inode *inode, + u32 ilen, + struct fscrypt_str *crypto_str) +{ + return -EOPNOTSUPP; +} + +static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) +{ + return; +} + +static inline int fscrypt_fname_disk_to_usr(struct inode *inode, + u32 hash, u32 minor_hash, + const struct fscrypt_str *iname, + struct fscrypt_str *oname) +{ + return -EOPNOTSUPP; +} + +static inline int fscrypt_fname_usr_to_disk(struct inode *inode, + const struct qstr *iname, + struct fscrypt_str *oname) +{ + return -EOPNOTSUPP; +} + +static inline bool fscrypt_match_name(const struct fscrypt_name *fname, + const u8 *de_name, u32 de_name_len) +{ + /* Encryption support disabled; use standard comparison */ + if (de_name_len != fname->disk_name.len) + return false; + return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); +} + +/* bio.c */ +static inline void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, + struct bio *bio) +{ + return; +} + +static inline void fscrypt_pullback_bio_page(struct page **page, bool restore) +{ + return; +} + +static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, + sector_t pblk, unsigned int len) +{ + return -EOPNOTSUPP; +} + +#endif /* _LINUX_FSCRYPT_NOTSUPP_H */ diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h new file mode 100644 index 000000000000..32e2fcf13b01 --- /dev/null +++ b/include/linux/fscrypt_supp.h @@ -0,0 +1,145 @@ +/* + * fscrypt_supp.h + * + * This is included by filesystems configured with encryption support. + */ + +#ifndef _LINUX_FSCRYPT_SUPP_H +#define _LINUX_FSCRYPT_SUPP_H + +#include + +/* crypto.c */ +extern struct kmem_cache *fscrypt_info_cachep; +extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t); +extern void fscrypt_release_ctx(struct fscrypt_ctx *); +extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, + unsigned int, unsigned int, + u64, gfp_t); +extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int, + unsigned int, u64); +extern void fscrypt_restore_control_page(struct page *); + +extern const struct dentry_operations fscrypt_d_ops; + +static inline void fscrypt_set_d_op(struct dentry *dentry) +{ + d_set_d_op(dentry, &fscrypt_d_ops); +} + +static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY; + spin_unlock(&dentry->d_lock); +} + +/* policy.c */ +extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); +extern int fscrypt_ioctl_get_policy(struct file *, void __user *); +extern int fscrypt_has_permitted_context(struct inode *, struct inode *); +extern int fscrypt_inherit_context(struct inode *, struct inode *, + void *, bool); +/* keyinfo.c */ +extern int fscrypt_get_encryption_info(struct inode *); +extern void fscrypt_put_encryption_info(struct inode *, struct fscrypt_info *); + +/* fname.c */ +extern int fscrypt_setup_filename(struct inode *, const struct qstr *, + int lookup, struct fscrypt_name *); + +static inline void fscrypt_free_filename(struct fscrypt_name *fname) +{ + kfree(fname->crypto_buf.name); +} + +extern u32 fscrypt_fname_encrypted_size(const struct inode *, u32); +extern int fscrypt_fname_alloc_buffer(const struct inode *, u32, + struct fscrypt_str *); +extern void fscrypt_fname_free_buffer(struct fscrypt_str *); +extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, + const struct fscrypt_str *, struct fscrypt_str *); +extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *, + struct fscrypt_str *); + +#define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 + +/* Extracts the second-to-last ciphertext block; see explanation below */ +#define FSCRYPT_FNAME_DIGEST(name, len) \ + ((name) + round_down((len) - FS_CRYPTO_BLOCK_SIZE - 1, \ + FS_CRYPTO_BLOCK_SIZE)) + +#define FSCRYPT_FNAME_DIGEST_SIZE FS_CRYPTO_BLOCK_SIZE + +/** + * fscrypt_digested_name - alternate identifier for an on-disk filename + * + * When userspace lists an encrypted directory without access to the key, + * filenames whose ciphertext is longer than FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE + * bytes are shown in this abbreviated form (base64-encoded) rather than as the + * full ciphertext (base64-encoded). This is necessary to allow supporting + * filenames up to NAME_MAX bytes, since base64 encoding expands the length. + * + * To make it possible for filesystems to still find the correct directory entry + * despite not knowing the full on-disk name, we encode any filesystem-specific + * 'hash' and/or 'minor_hash' which the filesystem may need for its lookups, + * followed by the second-to-last ciphertext block of the filename. Due to the + * use of the CBC-CTS encryption mode, the second-to-last ciphertext block + * depends on the full plaintext. (Note that ciphertext stealing causes the + * last two blocks to appear "flipped".) This makes accidental collisions very + * unlikely: just a 1 in 2^128 chance for two filenames to collide even if they + * share the same filesystem-specific hashes. + * + * However, this scheme isn't immune to intentional collisions, which can be + * created by anyone able to create arbitrary plaintext filenames and view them + * without the key. Making the "digest" be a real cryptographic hash like + * SHA-256 over the full ciphertext would prevent this, although it would be + * less efficient and harder to implement, especially since the filesystem would + * need to calculate it for each directory entry examined during a search. + */ +struct fscrypt_digested_name { + u32 hash; + u32 minor_hash; + u8 digest[FSCRYPT_FNAME_DIGEST_SIZE]; +}; + +/** + * fscrypt_match_name() - test whether the given name matches a directory entry + * @fname: the name being searched for + * @de_name: the name from the directory entry + * @de_name_len: the length of @de_name in bytes + * + * Normally @fname->disk_name will be set, and in that case we simply compare + * that to the name stored in the directory entry. The only exception is that + * if we don't have the key for an encrypted directory and a filename in it is + * very long, then we won't have the full disk_name and we'll instead need to + * match against the fscrypt_digested_name. + * + * Return: %true if the name matches, otherwise %false. + */ +static inline bool fscrypt_match_name(const struct fscrypt_name *fname, + const u8 *de_name, u32 de_name_len) +{ + if (unlikely(!fname->disk_name.name)) { + const struct fscrypt_digested_name *n = + (const void *)fname->crypto_buf.name; + if (WARN_ON_ONCE(fname->usr_fname->name[0] != '_')) + return false; + if (de_name_len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) + return false; + return !memcmp(FSCRYPT_FNAME_DIGEST(de_name, de_name_len), + n->digest, FSCRYPT_FNAME_DIGEST_SIZE); + } + + if (de_name_len != fname->disk_name.len) + return false; + return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); +} + +/* bio.c */ +extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *); +extern void fscrypt_pullback_bio_page(struct page **, bool); +extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, + unsigned int); + +#endif /* _LINUX_FSCRYPT_SUPP_H */ diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h deleted file mode 100644 index f6dfc2950f76..000000000000 --- a/include/linux/fscrypto.h +++ /dev/null @@ -1,409 +0,0 @@ -/* - * General per-file encryption definition - * - * Copyright (C) 2015, Google, Inc. - * - * Written by Michael Halcrow, 2015. - * Modified by Jaegeuk Kim, 2015. - */ - -#ifndef _LINUX_FSCRYPTO_H -#define _LINUX_FSCRYPTO_H - -#include -#include -#include -#include -#include -#include -#include - -#define FS_KEY_DERIVATION_NONCE_SIZE 16 -#define FS_ENCRYPTION_CONTEXT_FORMAT_V1 1 - -#define FS_POLICY_FLAGS_PAD_4 0x00 -#define FS_POLICY_FLAGS_PAD_8 0x01 -#define FS_POLICY_FLAGS_PAD_16 0x02 -#define FS_POLICY_FLAGS_PAD_32 0x03 -#define FS_POLICY_FLAGS_PAD_MASK 0x03 -#define FS_POLICY_FLAGS_VALID 0x03 - -/* Encryption algorithms */ -#define FS_ENCRYPTION_MODE_INVALID 0 -#define FS_ENCRYPTION_MODE_AES_256_XTS 1 -#define FS_ENCRYPTION_MODE_AES_256_GCM 2 -#define FS_ENCRYPTION_MODE_AES_256_CBC 3 -#define FS_ENCRYPTION_MODE_AES_256_CTS 4 - -/** - * Encryption context for inode - * - * Protector format: - * 1 byte: Protector format (1 = this version) - * 1 byte: File contents encryption mode - * 1 byte: File names encryption mode - * 1 byte: Flags - * 8 bytes: Master Key descriptor - * 16 bytes: Encryption Key derivation nonce - */ -struct fscrypt_context { - u8 format; - u8 contents_encryption_mode; - u8 filenames_encryption_mode; - u8 flags; - u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; - u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE]; -} __packed; - -/* Encryption parameters */ -#define FS_XTS_TWEAK_SIZE 16 -#define FS_AES_128_ECB_KEY_SIZE 16 -#define FS_AES_256_GCM_KEY_SIZE 32 -#define FS_AES_256_CBC_KEY_SIZE 32 -#define FS_AES_256_CTS_KEY_SIZE 32 -#define FS_AES_256_XTS_KEY_SIZE 64 -#define FS_MAX_KEY_SIZE 64 - -#define FS_KEY_DESC_PREFIX "fscrypt:" -#define FS_KEY_DESC_PREFIX_SIZE 8 - -/* This is passed in from userspace into the kernel keyring */ -struct fscrypt_key { - u32 mode; - u8 raw[FS_MAX_KEY_SIZE]; - u32 size; -} __packed; - -struct fscrypt_info { - u8 ci_data_mode; - u8 ci_filename_mode; - u8 ci_flags; - struct crypto_skcipher *ci_ctfm; - u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE]; -}; - -#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 -#define FS_WRITE_PATH_FL 0x00000002 - -struct fscrypt_ctx { - union { - struct { - struct page *bounce_page; /* Ciphertext page */ - struct page *control_page; /* Original page */ - } w; - struct { - struct bio *bio; - struct work_struct work; - } r; - struct list_head free_list; /* Free list */ - }; - u8 flags; /* Flags */ - u8 mode; /* Encryption mode for tfm */ -}; - -struct fscrypt_completion_result { - struct completion completion; - int res; -}; - -#define DECLARE_FS_COMPLETION_RESULT(ecr) \ - struct fscrypt_completion_result ecr = { \ - COMPLETION_INITIALIZER((ecr).completion), 0 } - -#define FS_FNAME_NUM_SCATTER_ENTRIES 4 -#define FS_CRYPTO_BLOCK_SIZE 16 -#define FS_FNAME_CRYPTO_DIGEST_SIZE 32 - -/** - * For encrypted symlinks, the ciphertext length is stored at the beginning - * of the string in little-endian format. - */ -struct fscrypt_symlink_data { - __le16 len; - char encrypted_path[1]; -} __packed; - -/** - * This function is used to calculate the disk space required to - * store a filename of length l in encrypted symlink format. - */ -static inline u32 fscrypt_symlink_data_len(u32 l) -{ - if (l < FS_CRYPTO_BLOCK_SIZE) - l = FS_CRYPTO_BLOCK_SIZE; - return (l + sizeof(struct fscrypt_symlink_data) - 1); -} - -struct fscrypt_str { - unsigned char *name; - u32 len; -}; - -struct fscrypt_name { - const struct qstr *usr_fname; - struct fscrypt_str disk_name; - u32 hash; - u32 minor_hash; - struct fscrypt_str crypto_buf; -}; - -#define FSTR_INIT(n, l) { .name = n, .len = l } -#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len) -#define fname_name(p) ((p)->disk_name.name) -#define fname_len(p) ((p)->disk_name.len) - -/* - * crypto opertions for filesystems - */ -struct fscrypt_operations { - int (*get_context)(struct inode *, void *, size_t); - int (*key_prefix)(struct inode *, u8 **); - int (*prepare_context)(struct inode *); - int (*set_context)(struct inode *, const void *, size_t, void *); - int (*dummy_context)(struct inode *); - bool (*is_encrypted)(struct inode *); - bool (*empty_dir)(struct inode *); - unsigned (*max_namelen)(struct inode *); -}; - -static inline bool fscrypt_dummy_context_enabled(struct inode *inode) -{ - if (inode->i_sb->s_cop->dummy_context && - inode->i_sb->s_cop->dummy_context(inode)) - return true; - return false; -} - -static inline bool fscrypt_valid_contents_enc_mode(u32 mode) -{ - return (mode == FS_ENCRYPTION_MODE_AES_256_XTS); -} - -static inline bool fscrypt_valid_filenames_enc_mode(u32 mode) -{ - return (mode == FS_ENCRYPTION_MODE_AES_256_CTS); -} - -static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) -{ - if (str->len == 1 && str->name[0] == '.') - return true; - - if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') - return true; - - return false; -} - -static inline struct page *fscrypt_control_page(struct page *page) -{ -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) - return ((struct fscrypt_ctx *)page_private(page))->w.control_page; -#else - WARN_ON_ONCE(1); - return ERR_PTR(-EINVAL); -#endif -} - -static inline int fscrypt_has_encryption_key(struct inode *inode) -{ -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) - return (inode->i_crypt_info != NULL); -#else - return 0; -#endif -} - -static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry) -{ -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) - spin_lock(&dentry->d_lock); - dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY; - spin_unlock(&dentry->d_lock); -#endif -} - -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) -extern const struct dentry_operations fscrypt_d_ops; -#endif - -static inline void fscrypt_set_d_op(struct dentry *dentry) -{ -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) - d_set_d_op(dentry, &fscrypt_d_ops); -#endif -} - -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) -/* crypto.c */ -extern struct kmem_cache *fscrypt_info_cachep; -int fscrypt_initialize(void); - -extern struct fscrypt_ctx *fscrypt_get_ctx(struct inode *, gfp_t); -extern void fscrypt_release_ctx(struct fscrypt_ctx *); -extern struct page *fscrypt_encrypt_page(struct inode *, struct page *, gfp_t); -extern int fscrypt_decrypt_page(struct page *); -extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *); -extern void fscrypt_pullback_bio_page(struct page **, bool); -extern void fscrypt_restore_control_page(struct page *); -extern int fscrypt_zeroout_range(struct inode *, pgoff_t, sector_t, - unsigned int); -/* policy.c */ -extern int fscrypt_process_policy(struct file *, const struct fscrypt_policy *); -extern int fscrypt_get_policy(struct inode *, struct fscrypt_policy *); -extern int fscrypt_has_permitted_context(struct inode *, struct inode *); -extern int fscrypt_inherit_context(struct inode *, struct inode *, - void *, bool); -/* keyinfo.c */ -extern int fscrypt_get_encryption_info(struct inode *); -extern void fscrypt_put_encryption_info(struct inode *, struct fscrypt_info *); - -/* fname.c */ -extern int fscrypt_setup_filename(struct inode *, const struct qstr *, - int lookup, struct fscrypt_name *); -extern void fscrypt_free_filename(struct fscrypt_name *); -extern u32 fscrypt_fname_encrypted_size(struct inode *, u32); -extern int fscrypt_fname_alloc_buffer(struct inode *, u32, - struct fscrypt_str *); -extern void fscrypt_fname_free_buffer(struct fscrypt_str *); -extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, - const struct fscrypt_str *, struct fscrypt_str *); -extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *, - struct fscrypt_str *); -#endif - -/* crypto.c */ -static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(struct inode *i, - gfp_t f) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline void fscrypt_notsupp_release_ctx(struct fscrypt_ctx *c) -{ - return; -} - -static inline struct page *fscrypt_notsupp_encrypt_page(struct inode *i, - struct page *p, gfp_t f) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline int fscrypt_notsupp_decrypt_page(struct page *p) -{ - return -EOPNOTSUPP; -} - -static inline void fscrypt_notsupp_decrypt_bio_pages(struct fscrypt_ctx *c, - struct bio *b) -{ - return; -} - -static inline void fscrypt_notsupp_pullback_bio_page(struct page **p, bool b) -{ - return; -} - -static inline void fscrypt_notsupp_restore_control_page(struct page *p) -{ - return; -} - -static inline int fscrypt_notsupp_zeroout_range(struct inode *i, pgoff_t p, - sector_t s, unsigned int f) -{ - return -EOPNOTSUPP; -} - -/* policy.c */ -static inline int fscrypt_notsupp_process_policy(struct file *f, - const struct fscrypt_policy *p) -{ - return -EOPNOTSUPP; -} - -static inline int fscrypt_notsupp_get_policy(struct inode *i, - struct fscrypt_policy *p) -{ - return -EOPNOTSUPP; -} - -static inline int fscrypt_notsupp_has_permitted_context(struct inode *p, - struct inode *i) -{ - return 0; -} - -static inline int fscrypt_notsupp_inherit_context(struct inode *p, - struct inode *i, void *v, bool b) -{ - return -EOPNOTSUPP; -} - -/* keyinfo.c */ -static inline int fscrypt_notsupp_get_encryption_info(struct inode *i) -{ - return -EOPNOTSUPP; -} - -static inline void fscrypt_notsupp_put_encryption_info(struct inode *i, - struct fscrypt_info *f) -{ - return; -} - - /* fname.c */ -static inline int fscrypt_notsupp_setup_filename(struct inode *dir, - const struct qstr *iname, - int lookup, struct fscrypt_name *fname) -{ - if (dir->i_sb->s_cop->is_encrypted(dir)) - return -EOPNOTSUPP; - - memset(fname, 0, sizeof(struct fscrypt_name)); - fname->usr_fname = iname; - fname->disk_name.name = (unsigned char *)iname->name; - fname->disk_name.len = iname->len; - return 0; -} - -static inline void fscrypt_notsupp_free_filename(struct fscrypt_name *fname) -{ - return; -} - -static inline u32 fscrypt_notsupp_fname_encrypted_size(struct inode *i, u32 s) -{ - /* never happens */ - WARN_ON(1); - return 0; -} - -static inline int fscrypt_notsupp_fname_alloc_buffer(struct inode *inode, - u32 ilen, struct fscrypt_str *crypto_str) -{ - return -EOPNOTSUPP; -} - -static inline void fscrypt_notsupp_fname_free_buffer(struct fscrypt_str *c) -{ - return; -} - -static inline int fscrypt_notsupp_fname_disk_to_usr(struct inode *inode, - u32 hash, u32 minor_hash, - const struct fscrypt_str *iname, - struct fscrypt_str *oname) -{ - return -EOPNOTSUPP; -} - -static inline int fscrypt_notsupp_fname_usr_to_disk(struct inode *inode, - const struct qstr *iname, - struct fscrypt_str *oname) -{ - return -EOPNOTSUPP; -} -#endif /* _LINUX_FSCRYPTO_H */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b3d34d3e0e7e..f4c0d36baffb 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -734,7 +734,8 @@ static inline unsigned long get_lock_parent_ip(void) static inline void time_hardirqs_off(unsigned long a0, unsigned long a1) { } #endif -#ifdef CONFIG_PREEMPT_TRACER +#if defined(CONFIG_PREEMPT_TRACER) || \ + (defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_PREEMPTIRQ_EVENTS)) extern void trace_preempt_on(unsigned long a0, unsigned long a1); extern void trace_preempt_off(unsigned long a0, unsigned long a1); #else diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index d596a076da11..8cc99de27bc2 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1521,11 +1521,11 @@ static inline void hv_signal_on_read(struct vmbus_channel *channel) cur_write_sz = hv_get_bytes_to_write(rbi); - if (cur_write_sz < pending_sz) + if (cur_write_sz <= pending_sz) return; cached_write_sz = hv_get_cached_bytes_to_write(rbi); - if (cached_write_sz < pending_sz) + if (cached_write_sz <= pending_sz) vmbus_setevent(channel); return; diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index e7fdec4db9da..6cc48ac55fd2 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -111,6 +111,9 @@ int ad_sd_write_reg(struct ad_sigma_delta *sigma_delta, unsigned int reg, int ad_sd_read_reg(struct ad_sigma_delta *sigma_delta, unsigned int reg, unsigned int size, unsigned int *val); +int ad_sd_reset(struct ad_sigma_delta *sigma_delta, + unsigned int reset_length); + int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, int *val); int ad_sd_calibrate_all(struct ad_sigma_delta *sigma_delta, diff --git a/include/linux/kbuild.h b/include/linux/kbuild.h index 22a72198c14b..4e80f3a9ad58 100644 --- a/include/linux/kbuild.h +++ b/include/linux/kbuild.h @@ -2,14 +2,14 @@ #define __LINUX_KBUILD_H #define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + asm volatile("\n.ascii \"->" #sym " %0 " #val "\"" : : "i" (val)) -#define BLANK() asm volatile("\n->" : : ) +#define BLANK() asm volatile("\n.ascii \"->\"" : : ) #define OFFSET(sym, str, mem) \ DEFINE(sym, offsetof(struct str, mem)) #define COMMENT(x) \ - asm volatile("\n->#" x) + asm volatile("\n.ascii \"->#" x "\"") #endif diff --git a/include/linux/key.h b/include/linux/key.h index 722914798f37..7e2d14379f49 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -126,6 +126,11 @@ static inline bool is_key_possessed(const key_ref_t key_ref) return (unsigned long) key_ref & 1UL; } +enum key_state { + KEY_IS_UNINSTANTIATED, + KEY_IS_POSITIVE, /* Positively instantiated */ +}; + /*****************************************************************************/ /* * authentication token / access credential / keyring @@ -157,6 +162,7 @@ struct key { * - may not match RCU dereferenced payload * - payload should contain own length */ + short state; /* Key state (+) or rejection error (-) */ #ifdef KEY_DEBUGGING unsigned magic; @@ -165,17 +171,16 @@ struct key { #endif unsigned long flags; /* status flags (change with bitops) */ -#define KEY_FLAG_INSTANTIATED 0 /* set if key has been instantiated */ -#define KEY_FLAG_DEAD 1 /* set if key type has been deleted */ -#define KEY_FLAG_REVOKED 2 /* set if key had been revoked */ -#define KEY_FLAG_IN_QUOTA 3 /* set if key consumes quota */ -#define KEY_FLAG_USER_CONSTRUCT 4 /* set if key is being constructed in userspace */ -#define KEY_FLAG_NEGATIVE 5 /* set if key is negative */ -#define KEY_FLAG_ROOT_CAN_CLEAR 6 /* set if key can be cleared by root without permission */ -#define KEY_FLAG_INVALIDATED 7 /* set if key has been invalidated */ -#define KEY_FLAG_BUILTIN 8 /* set if key is built in to the kernel */ -#define KEY_FLAG_ROOT_CAN_INVAL 9 /* set if key can be invalidated by root without permission */ -#define KEY_FLAG_KEEP 10 /* set if key should not be removed */ +#define KEY_FLAG_DEAD 0 /* set if key type has been deleted */ +#define KEY_FLAG_REVOKED 1 /* set if key had been revoked */ +#define KEY_FLAG_IN_QUOTA 2 /* set if key consumes quota */ +#define KEY_FLAG_USER_CONSTRUCT 3 /* set if key is being constructed in userspace */ +#define KEY_FLAG_ROOT_CAN_CLEAR 4 /* set if key can be cleared by root without permission */ +#define KEY_FLAG_INVALIDATED 5 /* set if key has been invalidated */ +#define KEY_FLAG_BUILTIN 6 /* set if key is built in to the kernel */ +#define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */ +#define KEY_FLAG_KEEP 8 /* set if key should not be removed */ +#define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */ /* the key type and key description string * - the desc is used to match a key against search criteria @@ -201,7 +206,6 @@ struct key { struct list_head name_link; struct assoc_array keys; }; - int reject_error; }; /* This is set on a keyring to restrict the addition of a link to a key @@ -235,6 +239,7 @@ extern struct key *key_alloc(struct key_type *type, #define KEY_ALLOC_NOT_IN_QUOTA 0x0002 /* not in quota */ #define KEY_ALLOC_BUILT_IN 0x0004 /* Key is built into kernel */ #define KEY_ALLOC_BYPASS_RESTRICTION 0x0008 /* Override the check on restricted keyrings */ +#define KEY_ALLOC_UID_KEYRING 0x0010 /* allocating a user or user session keyring */ extern void key_revoke(struct key *key); extern void key_invalidate(struct key *key); @@ -341,20 +346,33 @@ extern void key_set_timeout(struct key *, unsigned); #define KEY_NEED_SETATTR 0x20 /* Require permission to change attributes */ #define KEY_NEED_ALL 0x3f /* All the above permissions */ +static inline short key_read_state(const struct key *key) +{ + /* Barrier versus mark_key_instantiated(). */ + return smp_load_acquire(&key->state); +} + /** - * key_is_instantiated - Determine if a key has been positively instantiated + * key_is_positive - Determine if a key has been positively instantiated * @key: The key to check. * * Return true if the specified key has been positively instantiated, false * otherwise. */ -static inline bool key_is_instantiated(const struct key *key) +static inline bool key_is_positive(const struct key *key) { - return test_bit(KEY_FLAG_INSTANTIATED, &key->flags) && - !test_bit(KEY_FLAG_NEGATIVE, &key->flags); + return key_read_state(key) == KEY_IS_POSITIVE; } -#define rcu_dereference_key(KEY) \ +static inline bool key_is_negative(const struct key *key) +{ + return key_read_state(key) < 0; +} + +#define dereference_key_rcu(KEY) \ + (rcu_dereference((KEY)->payload.rcu_data0)) + +#define dereference_key_locked(KEY) \ (rcu_dereference_protected((KEY)->payload.rcu_data0, \ rwsem_is_locked(&((struct key *)(KEY))->sem))) diff --git a/include/linux/llist.h b/include/linux/llist.h index fd4ca0b4fe0f..ac6796138ba0 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -87,6 +87,23 @@ static inline void init_llist_head(struct llist_head *list) #define llist_entry(ptr, type, member) \ container_of(ptr, type, member) +/** + * member_address_is_nonnull - check whether the member address is not NULL + * @ptr: the object pointer (struct type * that contains the llist_node) + * @member: the name of the llist_node within the struct. + * + * This macro is conceptually the same as + * &ptr->member != NULL + * but it works around the fact that compilers can decide that taking a member + * address is never a NULL pointer. + * + * Real objects that start at a high address and have a member at NULL are + * unlikely to exist, but such pointers may be returned e.g. by the + * container_of() macro. + */ +#define member_address_is_nonnull(ptr, member) \ + ((uintptr_t)(ptr) + offsetof(typeof(*(ptr)), member) != 0) + /** * llist_for_each - iterate over some deleted entries of a lock-less list * @pos: the &struct llist_node to use as a loop cursor @@ -121,7 +138,7 @@ static inline void init_llist_head(struct llist_head *list) */ #define llist_for_each_entry(pos, node, member) \ for ((pos) = llist_entry((node), typeof(*(pos)), member); \ - &(pos)->member != NULL; \ + member_address_is_nonnull(pos, member); \ (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) /** @@ -143,7 +160,7 @@ static inline void init_llist_head(struct llist_head *list) */ #define llist_for_each_entry_safe(pos, n, node, member) \ for (pos = llist_entry((node), typeof(*pos), member); \ - &pos->member != NULL && \ + member_address_is_nonnull(pos, member) && \ (n = llist_entry(pos->member.next, typeof(*n), member), true); \ pos = n) diff --git a/include/linux/mbus.h b/include/linux/mbus.h index 2931aa43dab1..f70420e65ad7 100644 --- a/include/linux/mbus.h +++ b/include/linux/mbus.h @@ -31,8 +31,8 @@ struct mbus_dram_target_info struct mbus_dram_window { u8 cs_index; u8 mbus_attr; - u32 base; - u32 size; + u64 base; + u64 size; } cs[4]; }; diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index d0a69e71b8ab..f563bcf55e7f 100644 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -61,7 +61,7 @@ struct sdio_func { unsigned int state; /* function state */ #define SDIO_STATE_PRESENT (1<<0) /* present in sysfs */ - u8 tmpbuf[4]; /* DMA:able scratch buffer */ + u8 *tmpbuf; /* DMA:able scratch buffer */ unsigned num_info; /* number of info strings */ const char **info; /* info strings */ diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index a1a210d59961..25c0dc31f084 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -419,6 +419,11 @@ extern void mmu_notifier_synchronize(void); #else /* CONFIG_MMU_NOTIFIER */ +static inline int mm_has_notifiers(struct mm_struct *mm) +{ + return 0; +} + static inline void mmu_notifier_release(struct mm_struct *mm) { } diff --git a/include/linux/module.h b/include/linux/module.h index 0c3207d26ac0..1226280be63b 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -127,13 +127,13 @@ extern void cleanup_module(void); /* Each module must use one module_init(). */ #define module_init(initfn) \ - static inline initcall_t __inittest(void) \ + static inline initcall_t __maybe_unused __inittest(void) \ { return initfn; } \ int init_module(void) __attribute__((alias(#initfn))); /* This is only required if you want to be unloadable. */ #define module_exit(exitfn) \ - static inline exitcall_t __exittest(void) \ + static inline exitcall_t __maybe_unused __exittest(void) \ { return exitfn; } \ void cleanup_module(void) __attribute__((alias(#exitfn))); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 7321ae933867..102c84dcc11a 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -470,6 +470,7 @@ void svc_pool_map_put(void); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); +int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_destroy(struct svc_serv *); void svc_shutdown_net(struct svc_serv *, struct net *); diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index be007610ceb0..ba57266d9e80 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -273,6 +273,7 @@ struct trace_event_call { int perf_refcount; struct hlist_head __percpu *perf_events; struct bpf_prog *prog; + struct perf_event *bpf_prog_owner; int (*perf_perm)(struct trace_event_call *, struct perf_event *); diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h index c28dd523f96e..d43837f2ce3a 100644 --- a/include/linux/tty_flip.h +++ b/include/linux/tty_flip.h @@ -12,6 +12,7 @@ extern int tty_prepare_flip_string(struct tty_port *port, unsigned char **chars, size_t size); extern void tty_flip_buffer_push(struct tty_port *port); void tty_schedule_flip(struct tty_port *port); +int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag); static inline int tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag) @@ -26,7 +27,7 @@ static inline int tty_insert_flip_char(struct tty_port *port, *char_buf_ptr(tb, tb->used++) = ch; return 1; } - return tty_insert_flip_string_flags(port, &ch, &flag, 1); + return __tty_insert_flip_char(port, ch, flag); } static inline int tty_insert_flip_string(struct tty_port *port, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e2dba93e374f..2c7d876e2a1a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -902,21 +902,10 @@ struct ieee80211_tx_info { unsigned long jiffies; }; /* NB: vif can be NULL for injected frames */ - union { - /* NB: vif can be NULL for injected frames */ - struct ieee80211_vif *vif; - - /* When packets are enqueued on txq it's easy - * to re-construct the vif pointer. There's no - * more space in tx_info so it can be used to - * store the necessary enqueue time for packet - * sojourn time computation. - */ - codel_time_t enqueue_time; - }; + struct ieee80211_vif *vif; struct ieee80211_key_conf *hw_key; u32 flags; - /* 4 bytes free */ + codel_time_t enqueue_time; } control; struct { u64 cookie; diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index d150b5066201..97983d1c05e4 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -9,12 +9,13 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, struct sk_buff *skb, const struct nf_hook_state *state) { + unsigned int flags = IP6_FH_F_AUTH; int protohdr, thoff = 0; unsigned short frag_off; nft_set_pktinfo(pkt, skb, state); - protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); if (protohdr < 0) { nft_set_pktinfo_proto_unspec(pkt, skb); return; @@ -32,6 +33,7 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, const struct nf_hook_state *state) { #if IS_ENABLED(CONFIG_IPV6) + unsigned int flags = IP6_FH_F_AUTH; struct ipv6hdr *ip6h, _ip6h; unsigned int thoff = 0; unsigned short frag_off; @@ -50,7 +52,7 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, if (pkt_len + sizeof(*ip6h) > skb->len) return -1; - protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); if (protohdr < 0) return -1; diff --git a/include/net/netlink.h b/include/net/netlink.h index 254a0fc01800..42adccd6739d 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -756,7 +756,10 @@ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, */ static inline int nla_put_u8(struct sk_buff *skb, int attrtype, u8 value) { - return nla_put(skb, attrtype, sizeof(u8), &value); + /* temporary variables to work around GCC PR81715 with asan-stack=1 */ + u8 tmp = value; + + return nla_put(skb, attrtype, sizeof(u8), &tmp); } /** @@ -767,7 +770,9 @@ static inline int nla_put_u8(struct sk_buff *skb, int attrtype, u8 value) */ static inline int nla_put_u16(struct sk_buff *skb, int attrtype, u16 value) { - return nla_put(skb, attrtype, sizeof(u16), &value); + u16 tmp = value; + + return nla_put(skb, attrtype, sizeof(u16), &tmp); } /** @@ -778,7 +783,9 @@ static inline int nla_put_u16(struct sk_buff *skb, int attrtype, u16 value) */ static inline int nla_put_be16(struct sk_buff *skb, int attrtype, __be16 value) { - return nla_put(skb, attrtype, sizeof(__be16), &value); + __be16 tmp = value; + + return nla_put(skb, attrtype, sizeof(__be16), &tmp); } /** @@ -789,7 +796,9 @@ static inline int nla_put_be16(struct sk_buff *skb, int attrtype, __be16 value) */ static inline int nla_put_net16(struct sk_buff *skb, int attrtype, __be16 value) { - return nla_put_be16(skb, attrtype | NLA_F_NET_BYTEORDER, value); + __be16 tmp = value; + + return nla_put_be16(skb, attrtype | NLA_F_NET_BYTEORDER, tmp); } /** @@ -800,7 +809,9 @@ static inline int nla_put_net16(struct sk_buff *skb, int attrtype, __be16 value) */ static inline int nla_put_le16(struct sk_buff *skb, int attrtype, __le16 value) { - return nla_put(skb, attrtype, sizeof(__le16), &value); + __le16 tmp = value; + + return nla_put(skb, attrtype, sizeof(__le16), &tmp); } /** @@ -811,7 +822,9 @@ static inline int nla_put_le16(struct sk_buff *skb, int attrtype, __le16 value) */ static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value) { - return nla_put(skb, attrtype, sizeof(u32), &value); + u32 tmp = value; + + return nla_put(skb, attrtype, sizeof(u32), &tmp); } /** @@ -822,7 +835,9 @@ static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value) */ static inline int nla_put_be32(struct sk_buff *skb, int attrtype, __be32 value) { - return nla_put(skb, attrtype, sizeof(__be32), &value); + __be32 tmp = value; + + return nla_put(skb, attrtype, sizeof(__be32), &tmp); } /** @@ -833,7 +848,9 @@ static inline int nla_put_be32(struct sk_buff *skb, int attrtype, __be32 value) */ static inline int nla_put_net32(struct sk_buff *skb, int attrtype, __be32 value) { - return nla_put_be32(skb, attrtype | NLA_F_NET_BYTEORDER, value); + __be32 tmp = value; + + return nla_put_be32(skb, attrtype | NLA_F_NET_BYTEORDER, tmp); } /** @@ -844,7 +861,9 @@ static inline int nla_put_net32(struct sk_buff *skb, int attrtype, __be32 value) */ static inline int nla_put_le32(struct sk_buff *skb, int attrtype, __le32 value) { - return nla_put(skb, attrtype, sizeof(__le32), &value); + __le32 tmp = value; + + return nla_put(skb, attrtype, sizeof(__le32), &tmp); } /** @@ -857,7 +876,9 @@ static inline int nla_put_le32(struct sk_buff *skb, int attrtype, __le32 value) static inline int nla_put_u64_64bit(struct sk_buff *skb, int attrtype, u64 value, int padattr) { - return nla_put_64bit(skb, attrtype, sizeof(u64), &value, padattr); + u64 tmp = value; + + return nla_put_64bit(skb, attrtype, sizeof(u64), &tmp, padattr); } /** @@ -870,7 +891,9 @@ static inline int nla_put_u64_64bit(struct sk_buff *skb, int attrtype, static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value, int padattr) { - return nla_put_64bit(skb, attrtype, sizeof(__be64), &value, padattr); + __be64 tmp = value; + + return nla_put_64bit(skb, attrtype, sizeof(__be64), &tmp, padattr); } /** @@ -883,7 +906,9 @@ static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value, static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value, int padattr) { - return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, value, + __be64 tmp = value; + + return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, tmp, padattr); } @@ -897,7 +922,9 @@ static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value, static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value, int padattr) { - return nla_put_64bit(skb, attrtype, sizeof(__le64), &value, padattr); + __le64 tmp = value; + + return nla_put_64bit(skb, attrtype, sizeof(__le64), &tmp, padattr); } /** @@ -908,7 +935,9 @@ static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value, */ static inline int nla_put_s8(struct sk_buff *skb, int attrtype, s8 value) { - return nla_put(skb, attrtype, sizeof(s8), &value); + s8 tmp = value; + + return nla_put(skb, attrtype, sizeof(s8), &tmp); } /** @@ -919,7 +948,9 @@ static inline int nla_put_s8(struct sk_buff *skb, int attrtype, s8 value) */ static inline int nla_put_s16(struct sk_buff *skb, int attrtype, s16 value) { - return nla_put(skb, attrtype, sizeof(s16), &value); + s16 tmp = value; + + return nla_put(skb, attrtype, sizeof(s16), &tmp); } /** @@ -930,7 +961,9 @@ static inline int nla_put_s16(struct sk_buff *skb, int attrtype, s16 value) */ static inline int nla_put_s32(struct sk_buff *skb, int attrtype, s32 value) { - return nla_put(skb, attrtype, sizeof(s32), &value); + s32 tmp = value; + + return nla_put(skb, attrtype, sizeof(s32), &tmp); } /** @@ -943,7 +976,9 @@ static inline int nla_put_s32(struct sk_buff *skb, int attrtype, s32 value) static inline int nla_put_s64(struct sk_buff *skb, int attrtype, s64 value, int padattr) { - return nla_put_64bit(skb, attrtype, sizeof(s64), &value, padattr); + s64 tmp = value; + + return nla_put_64bit(skb, attrtype, sizeof(s64), &tmp, padattr); } /** @@ -993,7 +1028,9 @@ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, static inline int nla_put_in_addr(struct sk_buff *skb, int attrtype, __be32 addr) { - return nla_put_be32(skb, attrtype, addr); + __be32 tmp = addr; + + return nla_put_be32(skb, attrtype, tmp); } /** diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 2c098cd7e7e2..231df4fc8423 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -141,8 +141,12 @@ __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event); static inline int sctp_ulpevent_type_enabled(__u16 sn_type, struct sctp_event_subscribe *mask) { + int offset = sn_type - SCTP_SN_TYPE_BASE; char *amask = (char *) mask; - return amask[sn_type - SCTP_SN_TYPE_BASE]; + + if (offset >= sizeof(struct sctp_event_subscribe)) + return 0; + return amask[offset]; } /* Given an event subscription, is this event enabled? */ diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 835c30e491c8..9b6e6a46aed9 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -155,6 +155,7 @@ struct xfrm_state { int header_len; int trailer_len; u32 extra_flags; + u32 output_mark; } props; struct xfrm_lifetime_cfg lft; @@ -284,10 +285,12 @@ struct xfrm_policy_afinfo { struct dst_entry *(*dst_lookup)(struct net *net, int tos, int oif, const xfrm_address_t *saddr, - const xfrm_address_t *daddr); + const xfrm_address_t *daddr, + u32 mark); int (*get_saddr)(struct net *net, int oif, xfrm_address_t *saddr, - xfrm_address_t *daddr); + xfrm_address_t *daddr, + u32 mark); void (*decode_session)(struct sk_buff *skb, struct flowi *fl, int reverse); diff --git a/include/sound/seq_virmidi.h b/include/sound/seq_virmidi.h index a03acd0d398a..695257ae64ac 100644 --- a/include/sound/seq_virmidi.h +++ b/include/sound/seq_virmidi.h @@ -60,6 +60,7 @@ struct snd_virmidi_dev { int port; /* created/attached port */ unsigned int flags; /* SNDRV_VIRMIDI_* */ rwlock_t filelist_lock; + struct rw_semaphore filelist_sem; struct list_head filelist; }; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 903a09165bb1..94fbc2776364 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -6,8 +6,8 @@ #include -#define show_dev(entry) MAJOR(entry->dev), MINOR(entry->dev) -#define show_dev_ino(entry) show_dev(entry), (unsigned long)entry->ino +#define show_dev(dev) MAJOR(dev), MINOR(dev) +#define show_dev_ino(entry) show_dev(entry->dev), (unsigned long)entry->ino TRACE_DEFINE_ENUM(NODE); TRACE_DEFINE_ENUM(DATA); @@ -15,6 +15,8 @@ TRACE_DEFINE_ENUM(META); TRACE_DEFINE_ENUM(META_FLUSH); TRACE_DEFINE_ENUM(INMEM); TRACE_DEFINE_ENUM(INMEM_DROP); +TRACE_DEFINE_ENUM(INMEM_INVALIDATE); +TRACE_DEFINE_ENUM(INMEM_REVOKE); TRACE_DEFINE_ENUM(IPU); TRACE_DEFINE_ENUM(OPU); TRACE_DEFINE_ENUM(CURSEG_HOT_DATA); @@ -42,6 +44,7 @@ TRACE_DEFINE_ENUM(CP_FASTBOOT); TRACE_DEFINE_ENUM(CP_SYNC); TRACE_DEFINE_ENUM(CP_RECOVERY); TRACE_DEFINE_ENUM(CP_DISCARD); +TRACE_DEFINE_ENUM(CP_TRIMMED); #define show_block_type(type) \ __print_symbolic(type, \ @@ -51,32 +54,40 @@ TRACE_DEFINE_ENUM(CP_DISCARD); { META_FLUSH, "META_FLUSH" }, \ { INMEM, "INMEM" }, \ { INMEM_DROP, "INMEM_DROP" }, \ + { INMEM_INVALIDATE, "INMEM_INVALIDATE" }, \ { INMEM_REVOKE, "INMEM_REVOKE" }, \ { IPU, "IN-PLACE" }, \ { OPU, "OUT-OF-PLACE" }) -#define F2FS_BIO_FLAG_MASK(t) (t & (REQ_RAHEAD | WRITE_FLUSH_FUA)) -#define F2FS_BIO_EXTRA_MASK(t) (t & (REQ_META | REQ_PRIO)) +#define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_META | REQ_PRIO | \ + REQ_PREFLUSH | REQ_FUA) +#define F2FS_BIO_FLAG_MASK(t) (t & F2FS_OP_FLAGS) -#define show_bio_type(op_flags) show_bio_op_flags(op_flags), \ - show_bio_extra(op_flags) +#define show_bio_type(op,op_flags) show_bio_op(op), \ + show_bio_op_flags(op_flags) + +#define show_bio_op(op) \ + __print_symbolic(op, \ + { REQ_OP_READ, "READ" }, \ + { REQ_OP_WRITE, "WRITE" }, \ + { REQ_OP_DISCARD, "DISCARD" }, \ + { REQ_OP_SECURE_ERASE, "SECURE_ERASE" }, \ + { REQ_OP_WRITE_SAME, "WRITE_SAME" }) #define show_bio_op_flags(flags) \ - __print_symbolic(F2FS_BIO_FLAG_MASK(flags), \ - { 0, "WRITE" }, \ - { REQ_RAHEAD, "READAHEAD" }, \ - { READ_SYNC, "READ_SYNC" }, \ - { WRITE_SYNC, "WRITE_SYNC" }, \ - { WRITE_FLUSH, "WRITE_FLUSH" }, \ - { WRITE_FUA, "WRITE_FUA" }, \ - { WRITE_FLUSH_FUA, "WRITE_FLUSH_FUA" }) + __print_flags(F2FS_BIO_FLAG_MASK(flags), "|", \ + { REQ_RAHEAD, "R" }, \ + { REQ_SYNC, "S" }, \ + { REQ_META, "M" }, \ + { REQ_PRIO, "P" }, \ + { REQ_PREFLUSH, "PF" }, \ + { REQ_FUA, "FUA" }) -#define show_bio_extra(type) \ - __print_symbolic(F2FS_BIO_EXTRA_MASK(type), \ - { REQ_META, "(M)" }, \ - { REQ_PRIO, "(P)" }, \ - { REQ_META | REQ_PRIO, "(MP)" }, \ - { 0, " \b" }) +#define show_block_temp(temp) \ + __print_symbolic(temp, \ + { HOT, "HOT" }, \ + { WARM, "WARM" }, \ + { COLD, "COLD" }) #define show_data_type(type) \ __print_symbolic(type, \ @@ -109,12 +120,14 @@ TRACE_DEFINE_ENUM(CP_DISCARD); { GC_CB, "Cost-Benefit" }) #define show_cpreason(type) \ - __print_symbolic(type, \ + __print_flags(type, "|", \ { CP_UMOUNT, "Umount" }, \ { CP_FASTBOOT, "Fastboot" }, \ { CP_SYNC, "Sync" }, \ { CP_RECOVERY, "Recovery" }, \ - { CP_DISCARD, "Discard" }) + { CP_DISCARD, "Discard" }, \ + { CP_UMOUNT, "Umount" }, \ + { CP_TRIMMED, "Trimmed" }) struct victim_sel_policy; struct f2fs_map_blocks; @@ -237,7 +250,7 @@ TRACE_EVENT(f2fs_sync_fs, ), TP_printk("dev = (%d,%d), superblock is %s, wait = %d", - show_dev(__entry), + show_dev(__entry->dev), __entry->dirty ? "dirty" : "not dirty", __entry->wait) ); @@ -307,6 +320,13 @@ DEFINE_EVENT(f2fs__inode_exit, f2fs_unlink_exit, TP_ARGS(inode, ret) ); +DEFINE_EVENT(f2fs__inode_exit, f2fs_drop_inode, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + DEFINE_EVENT(f2fs__inode, f2fs_truncate, TP_PROTO(struct inode *inode), @@ -516,14 +536,14 @@ TRACE_EVENT(f2fs_map_blocks, TRACE_EVENT(f2fs_background_gc, - TP_PROTO(struct super_block *sb, long wait_ms, + TP_PROTO(struct super_block *sb, unsigned int wait_ms, unsigned int prefree, unsigned int free), TP_ARGS(sb, wait_ms, prefree, free), TP_STRUCT__entry( __field(dev_t, dev) - __field(long, wait_ms) + __field(unsigned int, wait_ms) __field(unsigned int, prefree) __field(unsigned int, free) ), @@ -535,13 +555,120 @@ TRACE_EVENT(f2fs_background_gc, __entry->free = free; ), - TP_printk("dev = (%d,%d), wait_ms = %ld, prefree = %u, free = %u", - show_dev(__entry), + TP_printk("dev = (%d,%d), wait_ms = %u, prefree = %u, free = %u", + show_dev(__entry->dev), __entry->wait_ms, __entry->prefree, __entry->free) ); +TRACE_EVENT(f2fs_gc_begin, + + TP_PROTO(struct super_block *sb, bool sync, bool background, + long long dirty_nodes, long long dirty_dents, + long long dirty_imeta, unsigned int free_sec, + unsigned int free_seg, int reserved_seg, + unsigned int prefree_seg), + + TP_ARGS(sb, sync, background, dirty_nodes, dirty_dents, dirty_imeta, + free_sec, free_seg, reserved_seg, prefree_seg), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(bool, sync) + __field(bool, background) + __field(long long, dirty_nodes) + __field(long long, dirty_dents) + __field(long long, dirty_imeta) + __field(unsigned int, free_sec) + __field(unsigned int, free_seg) + __field(int, reserved_seg) + __field(unsigned int, prefree_seg) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->sync = sync; + __entry->background = background; + __entry->dirty_nodes = dirty_nodes; + __entry->dirty_dents = dirty_dents; + __entry->dirty_imeta = dirty_imeta; + __entry->free_sec = free_sec; + __entry->free_seg = free_seg; + __entry->reserved_seg = reserved_seg; + __entry->prefree_seg = prefree_seg; + ), + + TP_printk("dev = (%d,%d), sync = %d, background = %d, nodes = %lld, " + "dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, " + "rsv_seg:%d, prefree_seg:%u", + show_dev(__entry->dev), + __entry->sync, + __entry->background, + __entry->dirty_nodes, + __entry->dirty_dents, + __entry->dirty_imeta, + __entry->free_sec, + __entry->free_seg, + __entry->reserved_seg, + __entry->prefree_seg) +); + +TRACE_EVENT(f2fs_gc_end, + + TP_PROTO(struct super_block *sb, int ret, int seg_freed, + int sec_freed, long long dirty_nodes, + long long dirty_dents, long long dirty_imeta, + unsigned int free_sec, unsigned int free_seg, + int reserved_seg, unsigned int prefree_seg), + + TP_ARGS(sb, ret, seg_freed, sec_freed, dirty_nodes, dirty_dents, + dirty_imeta, free_sec, free_seg, reserved_seg, prefree_seg), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, ret) + __field(int, seg_freed) + __field(int, sec_freed) + __field(long long, dirty_nodes) + __field(long long, dirty_dents) + __field(long long, dirty_imeta) + __field(unsigned int, free_sec) + __field(unsigned int, free_seg) + __field(int, reserved_seg) + __field(unsigned int, prefree_seg) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->ret = ret; + __entry->seg_freed = seg_freed; + __entry->sec_freed = sec_freed; + __entry->dirty_nodes = dirty_nodes; + __entry->dirty_dents = dirty_dents; + __entry->dirty_imeta = dirty_imeta; + __entry->free_sec = free_sec; + __entry->free_seg = free_seg; + __entry->reserved_seg = reserved_seg; + __entry->prefree_seg = prefree_seg; + ), + + TP_printk("dev = (%d,%d), ret = %d, seg_freed = %d, sec_freed = %d, " + "nodes = %lld, dents = %lld, imeta = %lld, free_sec:%u, " + "free_seg:%u, rsv_seg:%d, prefree_seg:%u", + show_dev(__entry->dev), + __entry->ret, + __entry->seg_freed, + __entry->sec_freed, + __entry->dirty_nodes, + __entry->dirty_dents, + __entry->dirty_imeta, + __entry->free_sec, + __entry->free_seg, + __entry->reserved_seg, + __entry->prefree_seg) +); + TRACE_EVENT(f2fs_get_victim, TP_PROTO(struct super_block *sb, int type, int gc_type, @@ -557,6 +684,7 @@ TRACE_EVENT(f2fs_get_victim, __field(int, alloc_mode) __field(int, gc_mode) __field(unsigned int, victim) + __field(unsigned int, cost) __field(unsigned int, ofs_unit) __field(unsigned int, pre_victim) __field(unsigned int, prefree) @@ -570,20 +698,23 @@ TRACE_EVENT(f2fs_get_victim, __entry->alloc_mode = p->alloc_mode; __entry->gc_mode = p->gc_mode; __entry->victim = p->min_segno; + __entry->cost = p->min_cost; __entry->ofs_unit = p->ofs_unit; __entry->pre_victim = pre_victim; __entry->prefree = prefree; __entry->free = free; ), - TP_printk("dev = (%d,%d), type = %s, policy = (%s, %s, %s), victim = %u " - "ofs_unit = %u, pre_victim_secno = %d, prefree = %u, free = %u", - show_dev(__entry), + TP_printk("dev = (%d,%d), type = %s, policy = (%s, %s, %s), " + "victim = %u, cost = %u, ofs_unit = %u, " + "pre_victim_secno = %d, prefree = %u, free = %u", + show_dev(__entry->dev), show_data_type(__entry->type), show_gc_type(__entry->gc_type), show_alloc_mode(__entry->alloc_mode), show_victim_policy(__entry->gc_mode), __entry->victim, + __entry->cost, __entry->ofs_unit, (int)__entry->pre_victim, __entry->prefree, @@ -715,7 +846,7 @@ TRACE_EVENT(f2fs_reserve_new_blocks, ), TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u, count = %llu", - show_dev(__entry), + show_dev(__entry->dev), (unsigned int)__entry->nid, __entry->ofs_in_node, (unsigned long long)__entry->count) @@ -735,6 +866,7 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, __field(block_t, new_blkaddr) __field(int, op) __field(int, op_flags) + __field(int, temp) __field(int, type) ), @@ -746,16 +878,18 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, __entry->new_blkaddr = fio->new_blkaddr; __entry->op = fio->op; __entry->op_flags = fio->op_flags; + __entry->temp = fio->temp; __entry->type = fio->type; ), TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " - "oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s%s, type = %s", + "oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s_%s", show_dev_ino(__entry), (unsigned long)__entry->index, (unsigned long long)__entry->old_blkaddr, (unsigned long long)__entry->new_blkaddr, - show_bio_type(__entry->op_flags), + show_bio_type(__entry->op, __entry->op_flags), + show_block_temp(__entry->temp), show_block_type(__entry->type)) ); @@ -768,7 +902,7 @@ DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_bio, TP_CONDITION(page->mapping) ); -DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio, +DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_write, TP_PROTO(struct page *page, struct f2fs_io_info *fio), @@ -777,15 +911,15 @@ DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio, TP_CONDITION(page->mapping) ); -DECLARE_EVENT_CLASS(f2fs__submit_bio, +DECLARE_EVENT_CLASS(f2fs__bio, - TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, - struct bio *bio), + TP_PROTO(struct super_block *sb, int type, struct bio *bio), - TP_ARGS(sb, fio, bio), + TP_ARGS(sb, type, bio), TP_STRUCT__entry( __field(dev_t, dev) + __field(dev_t, target) __field(int, op) __field(int, op_flags) __field(int, type) @@ -795,37 +929,55 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio, TP_fast_assign( __entry->dev = sb->s_dev; - __entry->op = fio->op; - __entry->op_flags = fio->op_flags; - __entry->type = fio->type; + __entry->target = bio->bi_bdev->bd_dev; + __entry->op = bio_op(bio); + __entry->op_flags = bio->bi_opf; + __entry->type = type; __entry->sector = bio->bi_iter.bi_sector; __entry->size = bio->bi_iter.bi_size; ), - TP_printk("dev = (%d,%d), rw = %s%s, %s, sector = %lld, size = %u", - show_dev(__entry), - show_bio_type(__entry->op_flags), + TP_printk("dev = (%d,%d)/(%d,%d), rw = %s(%s), %s, sector = %lld, size = %u", + show_dev(__entry->target), + show_dev(__entry->dev), + show_bio_type(__entry->op, __entry->op_flags), show_block_type(__entry->type), (unsigned long long)__entry->sector, __entry->size) ); -DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_write_bio, +DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_prepare_write_bio, - TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, - struct bio *bio), + TP_PROTO(struct super_block *sb, int type, struct bio *bio), - TP_ARGS(sb, fio, bio), + TP_ARGS(sb, type, bio), TP_CONDITION(bio) ); -DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio, +DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_prepare_read_bio, - TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, - struct bio *bio), + TP_PROTO(struct super_block *sb, int type, struct bio *bio), - TP_ARGS(sb, fio, bio), + TP_ARGS(sb, type, bio), + + TP_CONDITION(bio) +); + +DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_submit_read_bio, + + TP_PROTO(struct super_block *sb, int type, struct bio *bio), + + TP_ARGS(sb, type, bio), + + TP_CONDITION(bio) +); + +DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_submit_write_bio, + + TP_PROTO(struct super_block *sb, int type, struct bio *bio), + + TP_ARGS(sb, type, bio), TP_CONDITION(bio) ); @@ -1084,16 +1236,16 @@ TRACE_EVENT(f2fs_write_checkpoint, ), TP_printk("dev = (%d,%d), checkpoint for %s, state = %s", - show_dev(__entry), + show_dev(__entry->dev), show_cpreason(__entry->reason), __entry->msg) ); -TRACE_EVENT(f2fs_issue_discard, +DECLARE_EVENT_CLASS(f2fs_discard, - TP_PROTO(struct super_block *sb, block_t blkstart, block_t blklen), + TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen), - TP_ARGS(sb, blkstart, blklen), + TP_ARGS(dev, blkstart, blklen), TP_STRUCT__entry( __field(dev_t, dev) @@ -1102,40 +1254,78 @@ TRACE_EVENT(f2fs_issue_discard, ), TP_fast_assign( - __entry->dev = sb->s_dev; + __entry->dev = dev->bd_dev; __entry->blkstart = blkstart; __entry->blklen = blklen; ), TP_printk("dev = (%d,%d), blkstart = 0x%llx, blklen = 0x%llx", - show_dev(__entry), + show_dev(__entry->dev), (unsigned long long)__entry->blkstart, (unsigned long long)__entry->blklen) ); +DEFINE_EVENT(f2fs_discard, f2fs_queue_discard, + + TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen), + + TP_ARGS(dev, blkstart, blklen) +); + +DEFINE_EVENT(f2fs_discard, f2fs_issue_discard, + + TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen), + + TP_ARGS(dev, blkstart, blklen) +); + +TRACE_EVENT(f2fs_issue_reset_zone, + + TP_PROTO(struct block_device *dev, block_t blkstart), + + TP_ARGS(dev, blkstart), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(block_t, blkstart) + ), + + TP_fast_assign( + __entry->dev = dev->bd_dev; + __entry->blkstart = blkstart; + ), + + TP_printk("dev = (%d,%d), reset zone at block = 0x%llx", + show_dev(__entry->dev), + (unsigned long long)__entry->blkstart) +); + TRACE_EVENT(f2fs_issue_flush, - TP_PROTO(struct super_block *sb, unsigned int nobarrier, - unsigned int flush_merge), + TP_PROTO(struct block_device *dev, unsigned int nobarrier, + unsigned int flush_merge, int ret), - TP_ARGS(sb, nobarrier, flush_merge), + TP_ARGS(dev, nobarrier, flush_merge, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned int, nobarrier) __field(unsigned int, flush_merge) + __field(int, ret) ), TP_fast_assign( - __entry->dev = sb->s_dev; + __entry->dev = dev->bd_dev; __entry->nobarrier = nobarrier; __entry->flush_merge = flush_merge; + __entry->ret = ret; ), - TP_printk("dev = (%d,%d), %s %s", - show_dev(__entry), + TP_printk("dev = (%d,%d), %s %s, ret = %d", + show_dev(__entry->dev), __entry->nobarrier ? "skip (nobarrier)" : "issue", - __entry->flush_merge ? " with flush_merge" : "") + __entry->flush_merge ? " with flush_merge" : "", + __entry->ret) ); TRACE_EVENT(f2fs_lookup_extent_tree_start, @@ -1248,7 +1438,7 @@ TRACE_EVENT(f2fs_shrink_extent_tree, ), TP_printk("dev = (%d,%d), shrunk: node_cnt = %u, tree_cnt = %u", - show_dev(__entry), + show_dev(__entry->dev), __entry->node_cnt, __entry->tree_cnt) ); @@ -1295,7 +1485,7 @@ DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes, ), TP_printk("dev = (%d,%d), %s, dirty count = %lld", - show_dev(__entry), + show_dev(__entry->dev), show_file_type(__entry->type), __entry->count) ); diff --git a/include/trace/events/preemptirq.h b/include/trace/events/preemptirq.h new file mode 100644 index 000000000000..f5024c560d8f --- /dev/null +++ b/include/trace/events/preemptirq.h @@ -0,0 +1,70 @@ +#ifdef CONFIG_PREEMPTIRQ_EVENTS + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM preemptirq + +#if !defined(_TRACE_PREEMPTIRQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PREEMPTIRQ_H + +#include +#include +#include +#include + +DECLARE_EVENT_CLASS(preemptirq_template, + + TP_PROTO(unsigned long ip, unsigned long parent_ip), + + TP_ARGS(ip, parent_ip), + + TP_STRUCT__entry( + __field(u32, caller_offs) + __field(u32, parent_offs) + ), + + TP_fast_assign( + __entry->caller_offs = (u32)(ip - (unsigned long)_stext); + __entry->parent_offs = (u32)(parent_ip - (unsigned long)_stext); + ), + + TP_printk("caller=%pF parent=%pF", + (void *)((unsigned long)(_stext) + __entry->caller_offs), + (void *)((unsigned long)(_stext) + __entry->parent_offs)) +); + +#ifndef CONFIG_PROVE_LOCKING +DEFINE_EVENT(preemptirq_template, irq_disable, + TP_PROTO(unsigned long ip, unsigned long parent_ip), + TP_ARGS(ip, parent_ip)); + +DEFINE_EVENT(preemptirq_template, irq_enable, + TP_PROTO(unsigned long ip, unsigned long parent_ip), + TP_ARGS(ip, parent_ip)); +#endif + +#ifdef CONFIG_DEBUG_PREEMPT +DEFINE_EVENT(preemptirq_template, preempt_disable, + TP_PROTO(unsigned long ip, unsigned long parent_ip), + TP_ARGS(ip, parent_ip)); + +DEFINE_EVENT(preemptirq_template, preempt_enable, + TP_PROTO(unsigned long ip, unsigned long parent_ip), + TP_ARGS(ip, parent_ip)); +#endif + +#endif /* _TRACE_PREEMPTIRQ_H */ + +#include + +#else /* !CONFIG_PREEMPTIRQ_EVENTS */ + +#define trace_irq_enable(...) +#define trace_irq_disable(...) +#define trace_preempt_enable(...) +#define trace_preempt_disable(...) +#define trace_irq_enable_rcuidle(...) +#define trace_irq_disable_rcuidle(...) +#define trace_preempt_enable_rcuidle(...) +#define trace_preempt_disable_rcuidle(...) + +#endif diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index a5890bf44c0a..d1601a621929 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -154,6 +154,7 @@ extern "C" { /* Vendor Ids: */ #define DRM_FORMAT_MOD_NONE 0 +#define DRM_FORMAT_MOD_VENDOR_NONE 0 #define DRM_FORMAT_MOD_VENDOR_INTEL 0x01 #define DRM_FORMAT_MOD_VENDOR_AMD 0x02 #define DRM_FORMAT_MOD_VENDOR_NV 0x03 diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 11c030782426..7683188755b1 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -257,18 +257,47 @@ struct fsxattr { /* Policy provided via an ioctl on the topmost directory */ #define FS_KEY_DESCRIPTOR_SIZE 8 +#define FS_POLICY_FLAGS_PAD_4 0x00 +#define FS_POLICY_FLAGS_PAD_8 0x01 +#define FS_POLICY_FLAGS_PAD_16 0x02 +#define FS_POLICY_FLAGS_PAD_32 0x03 +#define FS_POLICY_FLAGS_PAD_MASK 0x03 +#define FS_POLICY_FLAGS_VALID 0x03 + +/* Encryption algorithms */ +#define FS_ENCRYPTION_MODE_INVALID 0 +#define FS_ENCRYPTION_MODE_AES_256_XTS 1 +#define FS_ENCRYPTION_MODE_AES_256_GCM 2 +#define FS_ENCRYPTION_MODE_AES_256_CBC 3 +#define FS_ENCRYPTION_MODE_AES_256_CTS 4 +#define FS_ENCRYPTION_MODE_AES_128_CBC 5 +#define FS_ENCRYPTION_MODE_AES_128_CTS 6 + struct fscrypt_policy { __u8 version; __u8 contents_encryption_mode; __u8 filenames_encryption_mode; __u8 flags; __u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; -} __packed; +}; #define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy) #define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) #define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy) +/* Parameters for passing an encryption key into the kernel keyring */ +#define FS_KEY_DESC_PREFIX "fscrypt:" +#define FS_KEY_DESC_PREFIX_SIZE 8 + +/* Structure that userspace passes to the kernel keyring */ +#define FS_MAX_KEY_SIZE 64 + +struct fscrypt_key { + __u32 mode; + __u8 raw[FS_MAX_KEY_SIZE]; + __u32 size; +}; + /* * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) * diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h index 5062fb5751e1..ed5721148768 100644 --- a/include/uapi/linux/mroute6.h +++ b/include/uapi/linux/mroute6.h @@ -4,6 +4,7 @@ #include #include #include +#include /* For struct sockaddr_in6. */ /* * Based on the MROUTING 3.5 defines primarily to keep diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index 0f9265cb2a96..7af20a136429 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -35,6 +35,7 @@ #define _LINUX_RDS_H #include +#include /* For __kernel_sockaddr_storage. */ #define RDS_IB_ABI_VERSION 0x301 @@ -223,7 +224,7 @@ struct rds_get_mr_args { }; struct rds_get_mr_for_dest_args { - struct sockaddr_storage dest_addr; + struct __kernel_sockaddr_storage dest_addr; struct rds_iovec vec; uint64_t cookie_addr; uint64_t flags; diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index a8acc24765fe..5e64a86989a5 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -759,6 +759,7 @@ struct usb_interface_assoc_descriptor { __u8 iFunction; } __attribute__ ((packed)); +#define USB_DT_INTERFACE_ASSOCIATION_SIZE 8 /*-------------------------------------------------------------------------*/ diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 1fc62b239f1b..7d75e5633641 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -303,6 +303,8 @@ enum xfrm_attr_type_t { XFRMA_PROTO, /* __u8 */ XFRMA_ADDRESS_FILTER, /* struct xfrm_address_filter */ XFRMA_PAD, + XFRMA_OFFLOAD_DEV, /* struct xfrm_state_offload */ + XFRMA_OUTPUT_MARK, /* __u32 */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h index 7c35e279d1e3..683057f79dca 100644 --- a/include/xen/swiotlb-xen.h +++ b/include/xen/swiotlb-xen.h @@ -58,4 +58,9 @@ xen_swiotlb_dma_supported(struct device *hwdev, u64 mask); extern int xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask); + +extern int +xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); #endif /* __LINUX_SWIOTLB_XEN_H */ diff --git a/init/initramfs.c b/init/initramfs.c index f8ce812ba43e..d0b53f49c98a 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -19,6 +19,7 @@ #include #include #include +#include static ssize_t __init xwrite(int fd, const char *p, size_t count) { @@ -664,6 +665,7 @@ static int __init populate_rootfs(void) printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err); free_initrd(); #endif + flush_delayed_fput(); /* * Try loading default modules from initramfs. This gives * us a chance to load before device_initcalls. diff --git a/init/main.c b/init/main.c index ae3996ae9bac..25bac88bc66e 100644 --- a/init/main.c +++ b/init/main.c @@ -70,7 +70,6 @@ #include #include #include -#include #include #include #include @@ -947,8 +946,6 @@ static int __ref kernel_init(void *unused) system_state = SYSTEM_RUNNING; numa_default_policy(); - flush_delayed_fput(); - rcu_end_inkernel_boot(); if (ramdisk_execute_command) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 779c871c5dcd..372454aa7f37 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1720,7 +1720,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } } else { if (insn->src_reg != BPF_REG_0 || insn->off != 0 || - (insn->imm != 16 && insn->imm != 32 && insn->imm != 64)) { + (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) || + BPF_CLASS(insn->code) == BPF_ALU64) { verbose("BPF_END uses reserved fields\n"); return -EINVAL; } diff --git a/kernel/configs/android-fetch-configs.sh b/kernel/configs/android-fetch-configs.sh new file mode 100755 index 000000000000..a5b56d4908dc --- /dev/null +++ b/kernel/configs/android-fetch-configs.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +curl https://android.googlesource.com/kernel/configs/+archive/master/android-4.9.tar.gz | tar xzv + diff --git a/kernel/cpuset.c b/kernel/cpuset.c index be12ac8d0ff1..194e2f24841b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2289,6 +2289,13 @@ retry: mutex_unlock(&cpuset_mutex); } +static bool force_rebuild; + +void cpuset_force_rebuild(void) +{ + force_rebuild = true; +} + /** * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset * @@ -2363,8 +2370,10 @@ static void cpuset_hotplug_workfn(struct work_struct *work) } /* rebuild sched domains if cpus_allowed has changed */ - if (cpus_updated) + if (cpus_updated || force_rebuild) { + force_rebuild = false; rebuild_sched_domains(); + } } void cpuset_update_active_cpus(bool cpu_online) @@ -2383,6 +2392,11 @@ void cpuset_update_active_cpus(bool cpu_online) schedule_work(&cpuset_hotplug_work); } +void cpuset_wait_for_hotplug(void) +{ + flush_work(&cpuset_hotplug_work); +} + /* * Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY]. * Call this routine anytime after node_states[N_MEMORY] changes. diff --git a/kernel/events/core.c b/kernel/events/core.c index 2ecb8891b5a9..60a8da7cdfff 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7876,6 +7876,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) } } event->tp_event->prog = prog; + event->tp_event->bpf_prog_owner = event; return 0; } @@ -7890,7 +7891,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event) return; prog = event->tp_event->prog; - if (prog) { + if (prog && event->tp_event->bpf_prog_owner == event) { event->tp_event->prog = NULL; bpf_prog_put(prog); } diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 00bb0aeea1d0..77977f55dff7 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -405,10 +405,8 @@ static void free_desc(unsigned int irq) * The sysfs entry must be serialized against a concurrent * irq_sysfs_init() as well. */ - mutex_lock(&sparse_irq_lock); kobject_del(&desc->kobj); delete_irq_desc(irq); - mutex_unlock(&sparse_irq_lock); /* * We free the descriptor, masks and stat fields via RCU. That @@ -446,20 +444,15 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, desc = alloc_desc(start + i, node, flags, mask, owner); if (!desc) goto err; - mutex_lock(&sparse_irq_lock); irq_insert_desc(start + i, desc); irq_sysfs_add(start + i, desc); - mutex_unlock(&sparse_irq_lock); } + bitmap_set(allocated_irqs, start, cnt); return start; err: for (i--; i >= 0; i--) free_desc(start + i); - - mutex_lock(&sparse_irq_lock); - bitmap_clear(allocated_irqs, start, cnt); - mutex_unlock(&sparse_irq_lock); return -ENOMEM; } @@ -558,6 +551,7 @@ static inline int alloc_descs(unsigned int start, unsigned int cnt, int node, desc->owner = owner; } + bitmap_set(allocated_irqs, start, cnt); return start; } @@ -653,10 +647,10 @@ void irq_free_descs(unsigned int from, unsigned int cnt) if (from >= nr_irqs || (from + cnt) > nr_irqs) return; + mutex_lock(&sparse_irq_lock); for (i = 0; i < cnt; i++) free_desc(from + i); - mutex_lock(&sparse_irq_lock); bitmap_clear(allocated_irqs, from, cnt); mutex_unlock(&sparse_irq_lock); } @@ -703,19 +697,15 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, from, cnt, 0); ret = -EEXIST; if (irq >=0 && start != irq) - goto err; + goto unlock; if (start + cnt > nr_irqs) { ret = irq_expand_nr_irqs(start + cnt); if (ret) - goto err; + goto unlock; } - - bitmap_set(allocated_irqs, start, cnt); - mutex_unlock(&sparse_irq_lock); - return alloc_descs(start, cnt, node, affinity, owner); - -err: + ret = alloc_descs(start, cnt, node, affinity, owner); +unlock: mutex_unlock(&sparse_irq_lock); return ret; } diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 4d7ffc0a0d00..6599c7f3071d 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3260,10 +3260,17 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (depth) { hlock = curr->held_locks + depth - 1; if (hlock->class_idx == class_idx && nest_lock) { - if (hlock->references) + if (hlock->references) { + /* + * Check: unsigned int references:12, overflow. + */ + if (DEBUG_LOCKS_WARN_ON(hlock->references == (1 << 12)-1)) + return 0; + hlock->references++; - else + } else { hlock->references = 2; + } return 1; } diff --git a/kernel/power/process.c b/kernel/power/process.c index 68d27ae215a2..9a12c835b6c2 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -19,8 +19,9 @@ #include #include #include +#include -/* +/* * Timeout for stopping processes */ unsigned int __read_mostly freeze_timeout_msecs = 20 * MSEC_PER_SEC; @@ -210,6 +211,8 @@ void thaw_processes(void) __usermodehelper_set_disable_depth(UMH_FREEZING); thaw_workqueues(); + cpuset_wait_for_hotplug(); + read_lock(&tasklist_lock); for_each_process_thread(g, p) { /* No other threads should have PF_SUSPEND_TASK set */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 10f62c6f48e7..d1a02877a42c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -792,8 +792,13 @@ void rcu_irq_exit(void) long long oldval; struct rcu_dynticks *rdtp; - RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!"); rdtp = this_cpu_ptr(&rcu_dynticks); + + /* Page faults can happen in NMI handlers, so check... */ + if (READ_ONCE(rdtp->dynticks_nmi_nesting)) + return; + + RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!"); oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting--; WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && @@ -930,8 +935,13 @@ void rcu_irq_enter(void) struct rcu_dynticks *rdtp; long long oldval; - RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!"); rdtp = this_cpu_ptr(&rcu_dynticks); + + /* Page faults can happen in NMI handlers, so check... */ + if (READ_ONCE(rdtp->dynticks_nmi_nesting)) + return; + + RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!"); oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting++; WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bb3912e7aa36..c7a317543b62 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1156,6 +1156,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, int ret = 0; rq = task_rq_lock(p, &rf); + update_rq_clock(rq); if (p->flags & PF_KTHREAD) { /* @@ -7551,16 +7552,15 @@ static void cpuset_cpu_active(void) * operation in the resume sequence, just build a single sched * domain, ignoring cpusets. */ - num_cpus_frozen--; - if (likely(num_cpus_frozen)) { - partition_sched_domains(1, NULL, NULL); + partition_sched_domains(1, NULL, NULL); + if (--num_cpus_frozen) return; - } /* * This is the last CPU online operation. So fall through and * restore the original sched domains by considering the * cpuset configurations. */ + cpuset_force_rebuild(); } cpuset_update_active_cpus(true); } diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 12605610b06e..43c4e45683c5 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -587,7 +587,6 @@ static int sugov_init(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy; struct sugov_tunables *tunables; - unsigned int lat; int ret = 0; /* State should be equivalent to EXIT */ @@ -626,12 +625,19 @@ static int sugov_init(struct cpufreq_policy *policy) goto stop_kthread; } - tunables->up_rate_limit_us = LATENCY_MULTIPLIER; - tunables->down_rate_limit_us = LATENCY_MULTIPLIER; - lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; - if (lat) { - tunables->up_rate_limit_us *= lat; - tunables->down_rate_limit_us *= lat; + if (policy->up_transition_delay_us && policy->down_transition_delay_us) { + tunables->up_rate_limit_us = policy->up_transition_delay_us; + tunables->down_rate_limit_us = policy->down_transition_delay_us; + } else { + unsigned int lat; + + tunables->up_rate_limit_us = LATENCY_MULTIPLIER; + tunables->down_rate_limit_us = LATENCY_MULTIPLIER; + lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; + if (lat) { + tunables->up_rate_limit_us *= lat; + tunables->down_rate_limit_us *= lat; + } } policy->governor_data = sg_policy; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0731623be1ee..d10a6d56cccb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3168,6 +3168,36 @@ static inline int propagate_entity_load_avg(struct sched_entity *se) return 1; } +/* + * Check if we need to update the load and the utilization of a blocked + * group_entity: + */ +static inline bool skip_blocked_update(struct sched_entity *se) +{ + struct cfs_rq *gcfs_rq = group_cfs_rq(se); + + /* + * If sched_entity still have not zero load or utilization, we have to + * decay it: + */ + if (se->avg.load_avg || se->avg.util_avg) + return false; + + /* + * If there is a pending propagation, we have to update the load and + * the utilization of the sched_entity: + */ + if (gcfs_rq->propagate_avg) + return false; + + /* + * Otherwise, the load and the utilization of the sched_entity is + * already zero and there is no pending propagation, so it will be a + * waste of time to try to decay it: + */ + return true; +} + #else /* CONFIG_FAIR_GROUP_SCHED */ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {} @@ -8083,6 +8113,8 @@ static void update_blocked_averages(int cpu) * list_add_leaf_cfs_rq() for details. */ for_each_leaf_cfs_rq(rq, cfs_rq) { + struct sched_entity *se; + /* throttled entities do not contribute to load */ if (throttled_hierarchy(cfs_rq)) continue; @@ -8090,9 +8122,10 @@ static void update_blocked_averages(int cpu) if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true)) update_tg_load_avg(cfs_rq, 0); - /* Propagate pending load changes to the parent */ - if (cfs_rq->tg->se[cpu]) - update_load_avg(cfs_rq->tg->se[cpu], 0); + /* Propagate pending load changes to the parent, if any: */ + se = cfs_rq->tg->se[cpu]; + if (se && !skip_blocked_update(se)) + update_load_avg(se, 0); } raw_spin_unlock_irqrestore(&rq->lock, flags); } diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index 522f723af576..ab7a897b820a 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -19,6 +19,7 @@ * and Todd Kjos */ +#include #include #include #include @@ -1018,6 +1019,11 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, for_each_cpu(i, cpus) { struct rq *rq = cpu_rq(i); + if (!acpi_disabled && !rq->max_freq) { + pr_warn("max frequency for CPU%d not populated\n", i); + continue; + } + rq->capacity = compute_capacity(i); rq->load_scale_factor = compute_load_scale_factor(i); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 0db7c8a2afe2..af182a6df25b 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -457,14 +457,19 @@ static long seccomp_attach_filter(unsigned int flags, return 0; } +void __get_seccomp_filter(struct seccomp_filter *filter) +{ + /* Reference count is bounded by the number of total processes. */ + atomic_inc(&filter->usage); +} + /* get_seccomp_filter - increments the reference count of the filter on @tsk */ void get_seccomp_filter(struct task_struct *tsk) { struct seccomp_filter *orig = tsk->seccomp.filter; if (!orig) return; - /* Reference count is bounded by the number of total processes. */ - atomic_inc(&orig->usage); + __get_seccomp_filter(orig); } static inline void seccomp_filter_free(struct seccomp_filter *filter) @@ -475,10 +480,8 @@ static inline void seccomp_filter_free(struct seccomp_filter *filter) } } -/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ -void put_seccomp_filter(struct task_struct *tsk) +static void __put_seccomp_filter(struct seccomp_filter *orig) { - struct seccomp_filter *orig = tsk->seccomp.filter; /* Clean up single-reference branches iteratively. */ while (orig && atomic_dec_and_test(&orig->usage)) { struct seccomp_filter *freeme = orig; @@ -487,6 +490,12 @@ void put_seccomp_filter(struct task_struct *tsk) } } +/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ +void put_seccomp_filter(struct task_struct *tsk) +{ + __put_seccomp_filter(tsk->seccomp.filter); +} + /** * seccomp_send_sigsys - signals the task to allow in-process syscall emulation * @syscall: syscall number to send to userland @@ -892,13 +901,13 @@ long seccomp_get_filter(struct task_struct *task, unsigned long filter_off, if (!data) goto out; - get_seccomp_filter(task); + __get_seccomp_filter(filter); spin_unlock_irq(&task->sighand->siglock); if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog))) ret = -EFAULT; - put_seccomp_filter(task); + __put_seccomp_filter(filter); return ret; out: diff --git a/kernel/sysctl.c b/kernel/sysctl.c index baf96926ce11..f70b15be12aa 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1255,6 +1255,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = timer_migration_handler, + .extra1 = &zero, + .extra2 = &one, }, #endif #ifdef CONFIG_BPF_SYSCALL diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index bb5ec425dfe0..eeb7f2f5698d 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -94,17 +94,15 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = }; static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { + /* Make sure we catch unsupported clockids */ + [0 ... MAX_CLOCKS - 1] = HRTIMER_MAX_CLOCK_BASES, + [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, [CLOCK_TAI] = HRTIMER_BASE_TAI, }; -static inline int hrtimer_clockid_to_base(clockid_t clock_id) -{ - return hrtimer_clock_to_base_table[clock_id]; -} - /* * Functions and macros which are different for UP/SMP systems are kept in a * single place @@ -1112,6 +1110,18 @@ u64 hrtimer_get_next_event(void) } #endif +static inline int hrtimer_clockid_to_base(clockid_t clock_id) +{ + if (likely(clock_id < MAX_CLOCKS)) { + int base = hrtimer_clock_to_base_table[clock_id]; + + if (likely(base != HRTIMER_MAX_CLOCK_BASES)) + return base; + } + WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id); + return HRTIMER_BASE_MONOTONIC; +} + static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) { diff --git a/kernel/time/timer.c b/kernel/time/timer.c index df445cde8a1e..7d670362891a 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -240,7 +240,7 @@ int timer_migration_handler(struct ctl_table *table, int write, int ret; mutex_lock(&mutex); - ret = proc_dointvec(table, write, buffer, lenp, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!ret && write) timers_update_migration(false); mutex_unlock(&mutex); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index da5768901a0d..5771ce7f08b3 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -160,6 +160,17 @@ config FUNCTION_GRAPH_TRACER address on the current task structure into a stack of calls. +config PREEMPTIRQ_EVENTS + bool "Enable trace events for preempt and irq disable/enable" + select TRACE_IRQFLAGS + depends on DEBUG_PREEMPT || !PROVE_LOCKING + default n + help + Enable tracing of disable and enable events for preemption and irqs. + For tracing preempt disable/enable events, DEBUG_PREEMPT must be + enabled. For tracing irq disable/enable events, PROVE_LOCKING must + be disabled. + config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" default n diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index b4eaf9c9c610..907c14446dbd 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_TRACING) += trace_printk.o obj-$(CONFIG_TRACING_MAP) += tracing_map.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o +obj-$(CONFIG_PREEMPTIRQ_EVENTS) += trace_irqsoff.o obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6e432ed7d0fe..5b8d7189e147 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2747,13 +2747,14 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) if (!command || !ftrace_enabled) { /* - * If these are per_cpu ops, they still need their - * per_cpu field freed. Since, function tracing is + * If these are dynamic or per_cpu ops, they still + * need their data freed. Since, function tracing is * not currently active, we can just free them * without synchronizing all CPUs. */ - if (ops->flags & FTRACE_OPS_FL_PER_CPU) - per_cpu_ops_free(ops); + if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU)) + goto free_ops; + return 0; } @@ -2808,6 +2809,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU)) { schedule_on_each_cpu(ftrace_sync); + free_ops: arch_ftrace_trampoline_free(ops); if (ops->flags & FTRACE_OPS_FL_PER_CPU) @@ -4379,9 +4381,6 @@ static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; static char ftrace_graph_notrace_buf[FTRACE_FILTER_SIZE] __initdata; static int ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer); -static unsigned long save_global_trampoline; -static unsigned long save_global_flags; - static int __init set_graph_function(char *str) { strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); @@ -5979,17 +5978,6 @@ void unregister_ftrace_graph(void) unregister_pm_notifier(&ftrace_suspend_notifier); unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); -#ifdef CONFIG_DYNAMIC_FTRACE - /* - * Function graph does not allocate the trampoline, but - * other global_ops do. We need to reset the ALLOC_TRAMP flag - * if one was used. - */ - global_ops.trampoline = save_global_trampoline; - if (save_global_flags & FTRACE_OPS_FL_ALLOC_TRAMP) - global_ops.flags |= FTRACE_OPS_FL_ALLOC_TRAMP; -#endif - out: mutex_unlock(&ftrace_lock); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e548b32aa4f5..b782c8487c11 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2390,11 +2390,17 @@ static char *get_trace_buf(void) if (!buffer || buffer->nesting >= 4) return NULL; - return &buffer->buffer[buffer->nesting++][0]; + buffer->nesting++; + + /* Interrupts must see nesting incremented before we use the buffer */ + barrier(); + return &buffer->buffer[buffer->nesting][0]; } static void put_trace_buf(void) { + /* Don't let the decrement of nesting leak before this */ + barrier(); this_cpu_dec(trace_percpu_buffer->nesting); } @@ -3609,11 +3615,17 @@ static int tracing_open(struct inode *inode, struct file *file) /* If this file was open for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { int cpu = tracing_get_cpu(inode); + struct trace_buffer *trace_buf = &tr->trace_buffer; + +#ifdef CONFIG_TRACER_MAX_TRACE + if (tr->current_trace->print_max) + trace_buf = &tr->max_buffer; +#endif if (cpu == RING_BUFFER_ALL_CPUS) - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(trace_buf); else - tracing_reset(&tr->trace_buffer, cpu); + tracing_reset(trace_buf, cpu); } if (file->f_mode & FMODE_READ) { @@ -5212,7 +5224,7 @@ static int tracing_wait_pipe(struct file *filp) * * iter->pos will be 0 if we haven't read anything. */ - if (!tracing_is_on() && iter->pos) + if (!tracer_tracing_is_on(iter->tr) && iter->pos) break; mutex_unlock(&iter->mutex); @@ -5748,7 +5760,7 @@ static int tracing_set_clock(struct trace_array *tr, const char *clockstr) tracing_reset_online_cpus(&tr->trace_buffer); #ifdef CONFIG_TRACER_MAX_TRACE - if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer) + if (tr->max_buffer.buffer) ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func); tracing_reset_online_cpus(&tr->max_buffer); #endif diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 03cdff84d026..c180fe513eb2 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -16,6 +16,10 @@ #include "trace.h" +#define CREATE_TRACE_POINTS +#include + +#if defined(CONFIG_IRQSOFF_TRACER) || defined(CONFIG_PREEMPT_TRACER) static struct trace_array *irqsoff_trace __read_mostly; static int tracer_enabled __read_mostly; @@ -450,64 +454,44 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1) #else /* !CONFIG_PROVE_LOCKING */ -/* - * Stubs: - */ - -void trace_softirqs_on(unsigned long ip) -{ -} - -void trace_softirqs_off(unsigned long ip) -{ -} - -inline void print_irqtrace_events(struct task_struct *curr) -{ -} - /* * We are only interested in hardirq on/off events: */ -void trace_hardirqs_on(void) +static inline void tracer_hardirqs_on(void) { if (!preempt_trace() && irq_trace()) stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } -EXPORT_SYMBOL(trace_hardirqs_on); -void trace_hardirqs_off(void) +static inline void tracer_hardirqs_off(void) { if (!preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } -EXPORT_SYMBOL(trace_hardirqs_off); -__visible void trace_hardirqs_on_caller(unsigned long caller_addr) +static inline void tracer_hardirqs_on_caller(unsigned long caller_addr) { if (!preempt_trace() && irq_trace()) stop_critical_timing(CALLER_ADDR0, caller_addr); } -EXPORT_SYMBOL(trace_hardirqs_on_caller); -__visible void trace_hardirqs_off_caller(unsigned long caller_addr) +static inline void tracer_hardirqs_off_caller(unsigned long caller_addr) { if (!preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, caller_addr); } -EXPORT_SYMBOL(trace_hardirqs_off_caller); #endif /* CONFIG_PROVE_LOCKING */ #endif /* CONFIG_IRQSOFF_TRACER */ #ifdef CONFIG_PREEMPT_TRACER -void trace_preempt_on(unsigned long a0, unsigned long a1) +static inline void tracer_preempt_on(unsigned long a0, unsigned long a1) { if (preempt_trace() && !irq_trace()) stop_critical_timing(a0, a1); } -void trace_preempt_off(unsigned long a0, unsigned long a1) +static inline void tracer_preempt_off(unsigned long a0, unsigned long a1) { if (preempt_trace() && !irq_trace()) start_critical_timing(a0, a1); @@ -769,3 +753,100 @@ __init static int init_irqsoff_tracer(void) return 0; } core_initcall(init_irqsoff_tracer); +#endif /* IRQSOFF_TRACER || PREEMPTOFF_TRACER */ + +#ifndef CONFIG_IRQSOFF_TRACER +static inline void tracer_hardirqs_on(void) { } +static inline void tracer_hardirqs_off(void) { } +static inline void tracer_hardirqs_on_caller(unsigned long caller_addr) { } +static inline void tracer_hardirqs_off_caller(unsigned long caller_addr) { } +#endif + +#ifndef CONFIG_PREEMPT_TRACER +static inline void tracer_preempt_on(unsigned long a0, unsigned long a1) { } +static inline void tracer_preempt_off(unsigned long a0, unsigned long a1) { } +#endif + +/* Per-cpu variable to prevent redundant calls when IRQs already off */ +static DEFINE_PER_CPU(int, tracing_irq_cpu); + +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PROVE_LOCKING) +void trace_hardirqs_on(void) +{ + if (!this_cpu_read(tracing_irq_cpu)) + return; + + trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); + tracer_hardirqs_on(); + + this_cpu_write(tracing_irq_cpu, 0); +} +EXPORT_SYMBOL(trace_hardirqs_on); + +void trace_hardirqs_off(void) +{ + if (this_cpu_read(tracing_irq_cpu)) + return; + + this_cpu_write(tracing_irq_cpu, 1); + + trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); + tracer_hardirqs_off(); +} +EXPORT_SYMBOL(trace_hardirqs_off); + +__visible void trace_hardirqs_on_caller(unsigned long caller_addr) +{ + if (!this_cpu_read(tracing_irq_cpu)) + return; + + trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr); + tracer_hardirqs_on_caller(caller_addr); + + this_cpu_write(tracing_irq_cpu, 0); +} +EXPORT_SYMBOL(trace_hardirqs_on_caller); + +__visible void trace_hardirqs_off_caller(unsigned long caller_addr) +{ + if (this_cpu_read(tracing_irq_cpu)) + return; + + this_cpu_write(tracing_irq_cpu, 1); + + trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr); + tracer_hardirqs_off_caller(caller_addr); +} +EXPORT_SYMBOL(trace_hardirqs_off_caller); + +/* + * Stubs: + */ + +void trace_softirqs_on(unsigned long ip) +{ +} + +void trace_softirqs_off(unsigned long ip) +{ +} + +inline void print_irqtrace_events(struct task_struct *curr) +{ +} +#endif + +#if defined(CONFIG_PREEMPT_TRACER) || \ + (defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_PREEMPTIRQ_EVENTS)) +void trace_preempt_on(unsigned long a0, unsigned long a1) +{ + trace_preempt_enable_rcuidle(a0, a1); + tracer_preempt_on(a0, a1); +} + +void trace_preempt_off(unsigned long a0, unsigned long a1) +{ + trace_preempt_disable_rcuidle(a0, a1); + tracer_preempt_off(a0, a1); +} +#endif diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index b0f86ea77881..ca70d11b8aa7 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -272,7 +272,7 @@ static int trace_selftest_ops(struct trace_array *tr, int cnt) goto out_free; if (cnt > 1) { if (trace_selftest_test_global_cnt == 0) - goto out; + goto out_free; } if (trace_selftest_test_dyn_cnt == 0) goto out_free; diff --git a/lib/digsig.c b/lib/digsig.c index 55b8b2f41a9e..6ba6fcd92dd1 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -85,7 +85,13 @@ static int digsig_verify_rsa(struct key *key, struct pubkey_hdr *pkh; down_read(&key->sem); - ukp = user_key_payload(key); + ukp = user_key_payload_locked(key); + + if (!ukp) { + /* key was revoked before we acquired its semaphore */ + err = -EKEYREVOKED; + goto err1; + } if (ukp->datalen < sizeof(*pkh)) goto err1; diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 08f8043cac61..d01f47135239 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -48,7 +48,9 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func) if (time_is_before_jiffies(rs->begin + rs->interval)) { if (rs->missed) { if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) { - pr_warn("%s: %d callbacks suppressed\n", func, rs->missed); + printk_deferred(KERN_WARNING + "%s: %d callbacks suppressed\n", + func, rs->missed); rs->missed = 0; } } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 47559cc0cdcc..2a800c4a39bd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -462,6 +462,8 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) struct mem_cgroup_tree_per_node *mctz; mctz = soft_limit_tree_from_page(page); + if (!mctz) + return; /* * Necessary to update all ancestors when hierarchy is used. * because their event counter is not touched. @@ -499,7 +501,8 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) for_each_node(nid) { mz = mem_cgroup_nodeinfo(memcg, nid); mctz = soft_limit_tree_node(nid); - mem_cgroup_remove_exceeded(mz, mctz); + if (mctz) + mem_cgroup_remove_exceeded(mz, mctz); } } @@ -2565,7 +2568,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, * is empty. Do it lockless to prevent lock bouncing. Races * are acceptable as soft limit is best effort anyway. */ - if (RB_EMPTY_ROOT(&mctz->rb_root)) + if (!mctz || RB_EMPTY_ROOT(&mctz->rb_root)) return 0; /* diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ede137345a99..c9f715b2917f 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -179,7 +179,7 @@ static void release_memory_resource(struct resource *res) void get_page_bootmem(unsigned long info, struct page *page, unsigned long type) { - page->lru.next = (struct list_head *) type; + page->freelist = (void *)type; SetPagePrivate(page); set_page_private(page, info); page_ref_inc(page); @@ -189,11 +189,12 @@ void put_page_bootmem(struct page *page) { unsigned long type; - type = (unsigned long) page->lru.next; + type = (unsigned long) page->freelist; BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE || type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE); if (page_ref_dec_return(page) == 1) { + page->freelist = NULL; ClearPagePrivate(page); set_page_private(page, 0); INIT_LIST_HEAD(&page->lru); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index ec9f11d4f094..d631d251c150 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include "internal.h" @@ -490,6 +491,21 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) goto unlock_oom; } + /* + * If the mm has notifiers then we would need to invalidate them around + * unmap_page_range and that is risky because notifiers can sleep and + * what they do is basically undeterministic. So let's have a short + * sleep to give the oom victim some more time. + * TODO: we really want to get rid of this ugly hack and make sure that + * notifiers cannot block for unbounded amount of time and add + * mmu_notifier_invalidate_range_{start,end} around unmap_page_range + */ + if (mm_has_notifiers(mm)) { + up_read(&mm->mmap_sem); + schedule_timeout_idle(HZ); + goto unlock_oom; + } + /* * increase mm_users only after we know we will reap something so * that the mmput_async is called only when we have reaped something diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2abf8d5f0ad4..7064aae8ded7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2100,13 +2100,25 @@ static void unreserve_highatomic_pageblock(const struct alloc_context *ac) continue; /* - * It should never happen but changes to locking could - * inadvertently allow a per-cpu drain to add pages - * to MIGRATE_HIGHATOMIC while unreserving so be safe - * and watch for underflows. + * In page freeing path, migratetype change is racy so + * we can counter several free pages in a pageblock + * in this loop althoug we changed the pageblock type + * from highatomic to ac->migratetype. So we should + * adjust the count once. */ - zone->nr_reserved_highatomic -= min(pageblock_nr_pages, - zone->nr_reserved_highatomic); + if (get_pageblock_migratetype(page) == + MIGRATE_HIGHATOMIC) { + /* + * It should never happen but changes to + * locking could inadvertently allow a per-cpu + * drain to add pages to MIGRATE_HIGHATOMIC + * while unreserving so be safe and watch for + * underflows. + */ + zone->nr_reserved_highatomic -= min( + pageblock_nr_pages, + zone->nr_reserved_highatomic); + } /* * Convert to ac->migratetype and avoid the normal diff --git a/mm/slab_common.c b/mm/slab_common.c index 5d2f24fbafc5..622f6b6ae844 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -255,7 +255,7 @@ struct kmem_cache *find_mergeable(size_t size, size_t align, { struct kmem_cache *s; - if (slab_nomerge || (flags & SLAB_NEVER_MERGE)) + if (slab_nomerge) return NULL; if (ctor) @@ -266,6 +266,9 @@ struct kmem_cache *find_mergeable(size_t size, size_t align, size = ALIGN(size, align); flags = kmem_cache_flags(size, flags, name, NULL); + if (flags & SLAB_NEVER_MERGE) + return NULL; + list_for_each_entry_reverse(s, &slab_caches, list) { if (slab_unmergeable(s)) continue; diff --git a/mm/sparse.c b/mm/sparse.c index 1e168bf2779a..8c4c82e358e6 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -662,7 +662,7 @@ static void free_map_bootmem(struct page *memmap) >> PAGE_SHIFT; for (i = 0; i < nr_pages; i++, page++) { - magic = (unsigned long) page->lru.next; + magic = (unsigned long) page->freelist; BUG_ON(magic == NODE_INFO); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 7625ec8458de..5d4006e589cb 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1098,11 +1098,14 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev, spin_unlock_bh(&br->lock); } - err = br_changelink(dev, tb, data); + err = register_netdevice(dev); if (err) return err; - return register_netdevice(dev); + err = br_changelink(dev, tb, data); + if (err) + unregister_netdevice(dev); + return err; } static size_t br_get_size(const struct net_device *brdev) diff --git a/net/compat.c b/net/compat.c index 1cd2ec046164..a96fd2f3507b 100644 --- a/net/compat.c +++ b/net/compat.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -781,14 +782,24 @@ COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) { - int ret; - u32 a[6]; + u32 a[AUDITSC_ARGS]; + unsigned int len; u32 a0, a1; + int ret; if (call < SYS_SOCKET || call > SYS_SENDMMSG) return -EINVAL; - if (copy_from_user(a, args, nas[call])) + len = nas[call]; + if (len > sizeof(a)) + return -EINVAL; + + if (copy_from_user(a, args, len)) return -EFAULT; + + ret = audit_socketcall_compat(len / sizeof(a[0]), a); + if (ret) + return ret; + a0 = a[0]; a1 = a[1]; diff --git a/net/core/dev.c b/net/core/dev.c index ba7b8121a414..7f2caad46a3d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2355,6 +2355,9 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) { unsigned long flags; + if (unlikely(!skb)) + return; + if (likely(atomic_read(&skb->users) == 1)) { smp_rmb(); atomic_set(&skb->users, 0); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index b6791d94841d..31c4041f7586 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -425,6 +425,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) if (tb[FRA_TUN_ID]) rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]); + err = -EINVAL; if (tb[FRA_L3MDEV]) { #ifdef CONFIG_NET_L3_MASTER_DEV rule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]); @@ -446,7 +447,6 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) else rule->suppress_ifgroup = -1; - err = -EINVAL; if (tb[FRA_GOTO]) { if (rule->action != FR_ACT_GOTO) goto errout_free; @@ -576,8 +576,10 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) if (tb[FRA_UID_RANGE]) { range = nla_get_kuid_range(tb); - if (!uid_range_set(&range)) + if (!uid_range_set(&range)) { + err = -EINVAL; goto errout; + } } else { range = fib_kuid_range_unset; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4d2629781e8b..c2339b865164 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3758,6 +3758,9 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, return -EMSGSIZE; ifsm = nlmsg_data(nlh); + ifsm->family = PF_UNSPEC; + ifsm->pad1 = 0; + ifsm->pad2 = 0; ifsm->ifindex = dev->ifindex; ifsm->filter_mask = filter_mask; diff --git a/net/core/sock.c b/net/core/sock.c index f07eaea4600f..c6f42ee363e2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1501,6 +1501,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) sock_copy(newsk, sk); + newsk->sk_prot_creator = sk->sk_prot; + /* SANITY */ if (likely(newsk->sk_net_refcnt)) get_net(sock_net(newsk)); diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 8737412c7b27..e1d4d898a007 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -224,7 +224,7 @@ static int dns_resolver_match_preparse(struct key_match_data *match_data) static void dns_resolver_describe(const struct key *key, struct seq_file *m) { seq_puts(m, key->description); - if (key_is_instantiated(key)) { + if (key_is_positive(key)) { int err = PTR_ERR(key->payload.data[dns_key_error]); if (err) diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index ecc28cff08ab..d502c94b1a82 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -70,7 +70,7 @@ int dns_query(const char *type, const char *name, size_t namelen, const char *options, char **_result, time64_t *_expiry) { struct key *rkey; - const struct user_key_payload *upayload; + struct user_key_payload *upayload; const struct cred *saved_cred; size_t typelen, desclen; char *desc, *cp; @@ -141,7 +141,7 @@ int dns_query(const char *type, const char *name, size_t namelen, if (ret) goto put; - upayload = user_key_payload(rkey); + upayload = user_key_payload_locked(rkey); len = upayload->datalen; ret = -ENOMEM; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 079d76bc204c..5000e6f20f4a 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1269,26 +1269,32 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, p->old_duplex = -1; ds->ports[port].netdev = slave_dev; - ret = register_netdev(slave_dev); - if (ret) { - netdev_err(master, "error %d registering interface %s\n", - ret, slave_dev->name); - ds->ports[port].netdev = NULL; - free_netdev(slave_dev); - return ret; - } netif_carrier_off(slave_dev); ret = dsa_slave_phy_setup(p, slave_dev); if (ret) { netdev_err(master, "error %d setting up slave phy\n", ret); - unregister_netdev(slave_dev); - free_netdev(slave_dev); - return ret; + goto out_free; + } + + ret = register_netdev(slave_dev); + if (ret) { + netdev_err(master, "error %d registering interface %s\n", + ret, slave_dev->name); + goto out_phy; } return 0; + +out_phy: + phy_disconnect(p->phy); + if (of_phy_is_fixed_link(ds->ports[port].dn)) + of_phy_deregister_fixed_link(ds->ports[port].dn); +out_free: + free_netdev(slave_dev); + ds->ports[port].netdev = NULL; + return ret; } void dsa_slave_destroy(struct net_device *slave_dev) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c8409ca0abd2..2ec005c6217c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -319,7 +319,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, int ret, no_addr; struct fib_result res; struct flowi4 fl4; - struct net *net; + struct net *net = dev_net(dev); bool dev_match; fl4.flowi4_oif = 0; @@ -332,6 +332,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_tun_key.tun_id = 0; fl4.flowi4_flags = 0; + fl4.flowi4_uid = sock_net_uid(net, NULL); no_addr = idev->ifa_list == NULL; @@ -339,13 +340,12 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, trace_fib_validate_source(dev, &fl4); - net = dev_net(dev); if (fib_lookup(net, &fl4, &res, 0)) goto last_resort; if (res.type != RTN_UNICAST && (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev))) goto e_inval; - if (!rpf && !fib_num_tclassid_users(dev_net(dev)) && + if (!rpf && !fib_num_tclassid_users(net) && (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) goto last_resort; fib_combine_itag(itag, &res); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 5d7944f394d9..b120b9b11402 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -168,6 +168,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, struct ip_tunnel_parm *parms = &tunnel->parms; struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; /* Device to other host */ + int pkt_len = skb->len; int err; int mtu; @@ -229,7 +230,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, err = dst_output(tunnel->net, skb->sk, skb); if (net_xmit_eval(err) == 0) - err = skb->len; + err = pkt_len; iptunnel_xmit_stats(dev, err); return NETDEV_TX_OK; diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index c9b52c361da2..5a8f7c360887 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1304,6 +1304,7 @@ static int __init nf_nat_snmp_basic_init(void) static void __exit nf_nat_snmp_basic_fini(void) { RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); + synchronize_rcu(); nf_conntrack_helper_unregister(&snmp_trap_helper); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 80a7a8828876..b21bf597d39e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1860,6 +1860,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.flowi4_flags = 0; fl4.daddr = daddr; fl4.saddr = saddr; + fl4.flowi4_uid = sock_net_uid(net, NULL); err = fib_lookup(net, &fl4, &res, 0); if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a835716ee713..dd08d16dad2c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -914,6 +914,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, struct tcp_skb_cb *tcb; struct tcp_out_options opts; unsigned int tcp_options_size, tcp_header_size; + struct sk_buff *oskb = NULL; struct tcp_md5sig_key *md5; struct tcphdr *th; int err; @@ -922,11 +923,9 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tp = tcp_sk(sk); if (clone_it) { - skb_mstamp_get(&skb->skb_mstamp); TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq - tp->snd_una; - tcp_rate_skb_sent(sk, skb); - + oskb = skb; if (unlikely(skb_cloned(skb))) skb = pskb_copy(skb, gfp_mask); else @@ -934,6 +933,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (unlikely(!skb)) return -ENOBUFS; } + skb_mstamp_get(&skb->skb_mstamp); inet = inet_sk(sk); tcb = TCP_SKB_CB(skb); @@ -1035,12 +1035,15 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); - if (likely(err <= 0)) - return err; - - tcp_enter_cwr(sk); - - return net_xmit_eval(err); + if (unlikely(err > 0)) { + tcp_enter_cwr(sk); + err = net_xmit_eval(err); + } + if (!err && oskb) { + skb_mstamp_get(&oskb->skb_mstamp); + tcp_rate_skb_sent(sk, oskb); + } + return err; } /* This routine just queues the buffer for sending. @@ -2709,10 +2712,11 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) skb_headroom(skb) >= 0xFFFF)) { struct sk_buff *nskb; - skb_mstamp_get(&skb->skb_mstamp); nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -ENOBUFS; + if (!err) + skb_mstamp_get(&skb->skb_mstamp); } else { err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } @@ -3325,6 +3329,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) goto done; } + /* data was not sent, this is our new send_head */ + sk->sk_send_head = syn_data; + tp->packets_out -= tcp_skb_pcount(syn_data); + fallback: /* Send a regular SYN with Fast Open cookie request option */ if (fo->cookie.len > 0) @@ -3374,6 +3382,11 @@ int tcp_connect(struct sock *sk) */ tp->snd_nxt = tp->write_seq; tp->pushed_seq = tp->write_seq; + buff = tcp_send_head(sk); + if (unlikely(buff)) { + tp->snd_nxt = TCP_SKB_CB(buff)->seq; + tp->pushed_seq = TCP_SKB_CB(buff)->seq; + } TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 6de016f80f17..0932c85b42af 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -29,6 +29,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, u16 mac_len = skb->mac_len; int udp_offset, outer_hlen; __wsum partial; + bool need_ipsec; if (unlikely(!pskb_may_pull(skb, tnl_hlen))) goto out; @@ -62,8 +63,10 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); + need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb)); /* Try to offload checksum if possible */ offload_csum = !!(need_csum && + !need_ipsec && (skb->dev->features & (is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) : (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM)))); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 6a7ff6957535..7f9a8dfcfe18 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -22,7 +22,8 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo; static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, int tos, int oif, const xfrm_address_t *saddr, - const xfrm_address_t *daddr) + const xfrm_address_t *daddr, + u32 mark) { struct rtable *rt; @@ -30,6 +31,7 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, fl4->daddr = daddr->a4; fl4->flowi4_tos = tos; fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif); + fl4->flowi4_mark = mark; if (saddr) fl4->saddr = saddr->a4; @@ -44,20 +46,22 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, - const xfrm_address_t *daddr) + const xfrm_address_t *daddr, + u32 mark) { struct flowi4 fl4; - return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr); + return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr, mark); } static int xfrm4_get_saddr(struct net *net, int oif, - xfrm_address_t *saddr, xfrm_address_t *daddr) + xfrm_address_t *saddr, xfrm_address_t *daddr, + u32 mark) { struct dst_entry *dst; struct flowi4 fl4; - dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr); + dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr, mark); if (IS_ERR(dst)) return -EHOSTUNREACH; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5da864997495..7ba9a6ebf19d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -914,6 +914,7 @@ add: fn->fn_flags |= RTN_RTINFO; } nsiblings = iter->rt6i_nsiblings; + iter->rt6i_node = NULL; fib6_purge_rt(iter, fn, info->nl_net); if (fn->rr_ptr == iter) fn->rr_ptr = NULL; @@ -928,6 +929,7 @@ add: break; if (rt6_qualify_for_ecmp(iter)) { *ins = iter->dst.rt6_next; + iter->rt6i_node = NULL; fib6_purge_rt(iter, fn, info->nl_net); if (fn->rr_ptr == iter) fn->rr_ptr = NULL; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index ae87b9a64684..48e6e755eab9 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -940,24 +940,25 @@ done: } static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, - const void *daddr, const void *saddr, unsigned int len) + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) { struct ip6_tnl *t = netdev_priv(dev); - struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen); - __be16 *p = (__be16 *)(ipv6h+1); + struct ipv6hdr *ipv6h; + __be16 *p; - ip6_flow_hdr(ipv6h, 0, - ip6_make_flowlabel(dev_net(dev), skb, - t->fl.u.ip6.flowlabel, true, - &t->fl.u.ip6)); + ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen + sizeof(*ipv6h)); + ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb, + t->fl.u.ip6.flowlabel, + true, &t->fl.u.ip6)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = NEXTHDR_GRE; ipv6h->saddr = t->parms.laddr; ipv6h->daddr = t->parms.raddr; - p[0] = t->parms.o_flags; - p[1] = htons(type); + p = (__be16 *)(ipv6h + 1); + p[0] = t->parms.o_flags; + p[1] = htons(type); /* * Set the source hardware address. @@ -1301,6 +1302,7 @@ static void ip6gre_tap_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + netif_keep_dst(dev); } static bool ip6gre_netlink_encap_parms(struct nlattr *data[], diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index b44e9f57a765..64aefc2d13fa 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1042,6 +1042,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, struct dst_entry *dst = NULL, *ndst = NULL; struct net_device *tdev; int mtu; + unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0; unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; unsigned int max_headroom = psh_hlen; bool use_cache = false; @@ -1120,7 +1121,7 @@ route_lookup: t->parms.name); goto tx_err_dst_release; } - mtu = dst_mtu(dst) - psh_hlen - t->tun_hlen; + mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen; if (encap_limit >= 0) { max_headroom += 8; mtu -= 8; @@ -1129,7 +1130,7 @@ route_lookup: mtu = IPV6_MIN_MTU; if (skb_dst(skb) && !t->parms.collect_md) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) { + if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { *pmtu = mtu; err = -EMSGSIZE; goto tx_err_dst_release; @@ -2235,6 +2236,9 @@ static int __init ip6_tunnel_init(void) { int err; + if (!ipv6_mod_enabled()) + return -EOPNOTSUPP; + err = register_pernet_device(&ip6_tnl_net_ops); if (err < 0) goto out_pernet; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index bbeedff32d68..da64b20ded6b 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -445,6 +445,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; struct xfrm_state *x; + int pkt_len = skb->len; int err = -1; int mtu; @@ -498,7 +499,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += skb->len; + tstats->tx_bytes += pkt_len; tstats->tx_packets++; u64_stats_update_end(&tstats->syncp); } else { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c925fd9b3fa3..c43ef0cdf8cb 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -915,6 +915,7 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, */ offset = skb_transport_offset(skb); skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + csum = skb->csum; skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index e0f71c01d728..4003b2806a68 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -29,7 +29,8 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo; static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, - const xfrm_address_t *daddr) + const xfrm_address_t *daddr, + u32 mark) { struct flowi6 fl6; struct dst_entry *dst; @@ -38,6 +39,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif); fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; + fl6.flowi6_mark = mark; memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); if (saddr) memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr)); @@ -54,12 +56,13 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, } static int xfrm6_get_saddr(struct net *net, int oif, - xfrm_address_t *saddr, xfrm_address_t *daddr) + xfrm_address_t *saddr, xfrm_address_t *daddr, + u32 mark) { struct dst_entry *dst; struct net_device *dev; - dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr); + dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr, mark); if (IS_ERR(dst)) return -EHOSTUNREACH; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 3bce65183c95..b06acd0f400d 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1415,6 +1415,9 @@ static void l2tp_tunnel_del_work(struct work_struct *work) struct sock *sk = NULL; tunnel = container_of(work, struct l2tp_tunnel, del_work); + + l2tp_tunnel_closeall(tunnel); + sk = l2tp_tunnel_sock_lookup(tunnel); if (!sk) goto out; @@ -1734,15 +1737,12 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); /* This function is used by the netlink TUNNEL_DELETE command. */ -int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) +void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { - l2tp_tunnel_inc_refcount(tunnel); - l2tp_tunnel_closeall(tunnel); - if (false == queue_work(l2tp_wq, &tunnel->del_work)) { - l2tp_tunnel_dec_refcount(tunnel); - return 1; + if (!test_and_set_bit(0, &tunnel->dead)) { + l2tp_tunnel_inc_refcount(tunnel); + queue_work(l2tp_wq, &tunnel->del_work); } - return 0; } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 0095012509ac..42419f1c24cf 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -169,6 +169,9 @@ struct l2tp_tunnel_cfg { struct l2tp_tunnel { int magic; /* Should be L2TP_TUNNEL_MAGIC */ + + unsigned long dead; + struct rcu_head rcu; rwlock_t hlist_lock; /* protect session_hlist */ struct hlist_head session_hlist[L2TP_HASH_SIZE]; @@ -257,7 +260,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp); void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); -int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); +void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 37bec0f864b7..a7aa54f45e19 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -791,6 +791,7 @@ static int ieee80211_open(struct net_device *dev) static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_down) { + struct ieee80211_sub_if_data *txq_sdata = sdata; struct ieee80211_local *local = sdata->local; struct fq *fq = &local->fq; unsigned long flags; @@ -931,6 +932,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: + txq_sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, u.ap); + mutex_lock(&local->mtx); list_del(&sdata->u.vlan.list); mutex_unlock(&local->mtx); @@ -1001,8 +1005,17 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, } spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); - if (sdata->vif.txq) { - struct txq_info *txqi = to_txq_info(sdata->vif.txq); + if (txq_sdata->vif.txq) { + struct txq_info *txqi = to_txq_info(txq_sdata->vif.txq); + + /* + * FIXME FIXME + * + * We really shouldn't purge the *entire* txqi since that + * contains frames for the other AP_VLANs (and possibly + * the AP itself) as well, but there's no API in FQ now + * to be able to filter. + */ spin_lock_bh(&fq->lock); ieee80211_txq_purge(local, txqi); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index eede5c6db8d5..30bba53c2992 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -707,6 +707,8 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, if (!cookie) return -ENOENT; + flush_work(&local->hw_roc_start); + mutex_lock(&local->mtx); list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { if (!mgmt_tx && roc->cookie != cookie) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index b2c823ffad74..348700b424ea 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -688,7 +688,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending) } /* No need to do anything if the driver does all */ - if (ieee80211_hw_check(&local->hw, AP_LINK_PS)) + if (ieee80211_hw_check(&local->hw, AP_LINK_PS) && !local->ops->set_tim) return; if (sta->dead) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index dd190ff3daea..274c564bd9af 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1277,11 +1277,6 @@ static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb) IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time(); } -static void ieee80211_set_skb_vif(struct sk_buff *skb, struct txq_info *txqi) -{ - IEEE80211_SKB_CB(skb)->control.vif = txqi->txq.vif; -} - static u32 codel_skb_len_func(const struct sk_buff *skb) { return skb->len; @@ -3388,6 +3383,7 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, struct ieee80211_tx_info *info; struct ieee80211_tx_data tx; ieee80211_tx_result r; + struct ieee80211_vif *vif; spin_lock_bh(&fq->lock); @@ -3404,8 +3400,6 @@ begin: if (!skb) goto out; - ieee80211_set_skb_vif(skb, txqi); - hdr = (struct ieee80211_hdr *)skb->data; info = IEEE80211_SKB_CB(skb); @@ -3462,6 +3456,34 @@ begin: } } + switch (tx.sdata->vif.type) { + case NL80211_IFTYPE_MONITOR: + if (tx.sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) { + vif = &tx.sdata->vif; + break; + } + tx.sdata = rcu_dereference(local->monitor_sdata); + if (tx.sdata) { + vif = &tx.sdata->vif; + info->hw_queue = + vif->hw_queue[skb_get_queue_mapping(skb)]; + } else if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) { + ieee80211_free_txskb(&local->hw, skb); + goto begin; + } else { + vif = NULL; + } + break; + case NL80211_IFTYPE_AP_VLAN: + tx.sdata = container_of(tx.sdata->bss, + struct ieee80211_sub_if_data, u.ap); + /* fall through */ + default: + vif = &tx.sdata->vif; + break; + } + + IEEE80211_SKB_CB(skb)->control.vif = vif; out: spin_unlock_bh(&fq->lock); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 6bd150882ba4..ed9ce7c63252 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -95,19 +95,26 @@ static struct conntrack_gc_work conntrack_gc_work; void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) { + /* 1) Acquire the lock */ spin_lock(lock); - while (unlikely(nf_conntrack_locks_all)) { - spin_unlock(lock); - /* - * Order the 'nf_conntrack_locks_all' load vs. the - * spin_unlock_wait() loads below, to ensure - * that 'nf_conntrack_locks_all_lock' is indeed held: - */ - smp_rmb(); /* spin_lock(&nf_conntrack_locks_all_lock) */ - spin_unlock_wait(&nf_conntrack_locks_all_lock); - spin_lock(lock); - } + /* 2) read nf_conntrack_locks_all, with ACQUIRE semantics + * It pairs with the smp_store_release() in nf_conntrack_all_unlock() + */ + if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false)) + return; + + /* fast path failed, unlock */ + spin_unlock(lock); + + /* Slow path 1) get global lock */ + spin_lock(&nf_conntrack_locks_all_lock); + + /* Slow path 2) get the lock we want */ + spin_lock(lock); + + /* Slow path 3) release the global lock */ + spin_unlock(&nf_conntrack_locks_all_lock); } EXPORT_SYMBOL_GPL(nf_conntrack_lock); @@ -148,28 +155,27 @@ static void nf_conntrack_all_lock(void) int i; spin_lock(&nf_conntrack_locks_all_lock); + nf_conntrack_locks_all = true; - /* - * Order the above store of 'nf_conntrack_locks_all' against - * the spin_unlock_wait() loads below, such that if - * nf_conntrack_lock() observes 'nf_conntrack_locks_all' - * we must observe nf_conntrack_locks[] held: - */ - smp_mb(); /* spin_lock(&nf_conntrack_locks_all_lock) */ - for (i = 0; i < CONNTRACK_LOCKS; i++) { - spin_unlock_wait(&nf_conntrack_locks[i]); + spin_lock(&nf_conntrack_locks[i]); + + /* This spin_unlock provides the "release" to ensure that + * nf_conntrack_locks_all==true is visible to everyone that + * acquired spin_lock(&nf_conntrack_locks[]). + */ + spin_unlock(&nf_conntrack_locks[i]); } } static void nf_conntrack_all_unlock(void) { - /* - * All prior stores must be complete before we clear + /* All prior stores must be complete before we clear * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock() * might observe the false value but not the entire - * critical section: + * critical section. + * It pairs with the smp_load_acquire() in nf_conntrack_lock() */ smp_store_release(&nf_conntrack_locks_all, false); spin_unlock(&nf_conntrack_locks_all_lock); diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index da9df2d56e66..22fc32143e9c 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -290,6 +290,7 @@ void nf_conntrack_unregister_notifier(struct net *net, BUG_ON(notify != new); RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); + /* synchronize_rcu() is called from ctnetlink_exit. */ } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); @@ -326,6 +327,7 @@ void nf_ct_expect_unregister_notifier(struct net *net, BUG_ON(notify != new); RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); + /* synchronize_rcu() is called from ctnetlink_exit. */ } EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index f8dbacf66795..0d6c72d6b9ba 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -411,7 +411,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) struct net *net = nf_ct_exp_net(expect); struct hlist_node *next; unsigned int h; - int ret = 1; + int ret = 0; if (!master_help) { ret = -ESHUTDOWN; @@ -461,7 +461,7 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, spin_lock_bh(&nf_conntrack_expect_lock); ret = __nf_ct_expect_check(expect); - if (ret <= 0) + if (ret < 0) goto out; ret = nf_ct_expect_insert(expect); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 04111c1c3988..d5caed5bcfb1 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3413,6 +3413,7 @@ static void __exit ctnetlink_exit(void) #ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT RCU_INIT_POINTER(nfnl_ct_hook, NULL); #endif + synchronize_rcu(); } module_init(ctnetlink_init); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index dde64c4565d2..2916f4815c9c 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -892,6 +892,8 @@ static void __exit nf_nat_cleanup(void) #ifdef CONFIG_XFRM RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL); #endif + synchronize_rcu(); + for (i = 0; i < NFPROTO_NUMPROTO; i++) kfree(nf_nat_l4protos[i]); diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 3b79f34b5095..b1fcfa08f0b4 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -161,6 +161,7 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, int i, ret; struct nf_conntrack_expect_policy *expect_policy; struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; + unsigned int class_max; ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, nfnl_cthelper_expect_policy_set); @@ -170,19 +171,18 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, if (!tb[NFCTH_POLICY_SET_NUM]) return -EINVAL; - helper->expect_class_max = - ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); - - if (helper->expect_class_max != 0 && - helper->expect_class_max > NF_CT_MAX_EXPECT_CLASSES) + class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); + if (class_max == 0) + return -EINVAL; + if (class_max > NF_CT_MAX_EXPECT_CLASSES) return -EOVERFLOW; expect_policy = kzalloc(sizeof(struct nf_conntrack_expect_policy) * - helper->expect_class_max, GFP_KERNEL); + class_max, GFP_KERNEL); if (expect_policy == NULL) return -ENOMEM; - for (i=0; iexpect_class_max; i++) { + for (i = 0; i < class_max; i++) { if (!tb[NFCTH_POLICY_SET+i]) goto err; @@ -191,6 +191,8 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, if (ret < 0) goto err; } + + helper->expect_class_max = class_max - 1; helper->expect_policy = expect_policy; return 0; err: @@ -377,10 +379,10 @@ nfnl_cthelper_dump_policy(struct sk_buff *skb, goto nla_put_failure; if (nla_put_be32(skb, NFCTH_POLICY_SET_NUM, - htonl(helper->expect_class_max))) + htonl(helper->expect_class_max + 1))) goto nla_put_failure; - for (i=0; iexpect_class_max; i++) { + for (i = 0; i < helper->expect_class_max + 1; i++) { nest_parms2 = nla_nest_start(skb, (NFCTH_POLICY_SET+i) | NLA_F_NESTED); if (nest_parms2 == NULL) diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 139e0867e56e..47d6656c9119 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -646,8 +646,8 @@ static void __exit cttimeout_exit(void) #ifdef CONFIG_NF_CONNTRACK_TIMEOUT RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL); RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL); + synchronize_rcu(); #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - rcu_barrier(); } module_init(cttimeout_init); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 246f29d365c0..2a5775f8a6ca 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2211,10 +2211,13 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, mutex_unlock(nlk->cb_mutex); + ret = 0; if (cb->start) - cb->start(cb); + ret = cb->start(cb); + + if (!ret) + ret = netlink_dump(sk); - ret = netlink_dump(sk); sock_put(sk); if (ret) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 35ba4b60d927..b17f9097c6fe 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1648,10 +1648,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) mutex_lock(&fanout_mutex); - err = -EINVAL; - if (!po->running) - goto out; - err = -EALREADY; if (po->fanout) goto out; @@ -1700,7 +1696,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) list_add(&match->list, &fanout_list); } err = -EINVAL; - if (match->type == type && + + spin_lock(&po->bind_lock); + if (po->running && + match->type == type && match->prot_hook.type == po->prot_hook.type && match->prot_hook.dev == po->prot_hook.dev) { err = -ENOSPC; @@ -1712,6 +1711,13 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) err = 0; } } + spin_unlock(&po->bind_lock); + + if (err && !atomic_read(&match->sk_ref)) { + list_del(&match->list); + kfree(match); + } + out: if (err && rollover) { kfree(rollover); @@ -2832,6 +2838,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) struct virtio_net_hdr vnet_hdr = { 0 }; int offset = 0; struct packet_sock *po = pkt_sk(sk); + bool has_vnet_hdr = false; int hlen, tlen, linear; int extra_len = 0; @@ -2875,6 +2882,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) err = packet_snd_vnet_parse(msg, &len, &vnet_hdr); if (err) goto out_unlock; + has_vnet_hdr = true; } if (unlikely(sock_flag(sk, SOCK_NOFCS))) { @@ -2935,7 +2943,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) packet_pick_tx_queue(dev, skb); - if (po->has_vnet_hdr) { + if (has_vnet_hdr) { err = packet_snd_vnet_gso(skb, &vnet_hdr); if (err) goto out_free; @@ -3063,13 +3071,15 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, int ret = 0; bool unlisted = false; - if (po->fanout) - return -EINVAL; - lock_sock(sk); spin_lock(&po->bind_lock); rcu_read_lock(); + if (po->fanout) { + ret = -EINVAL; + goto out_unlock; + } + if (name) { dev = dev_get_by_name_rcu(sock_net(sk), name); if (!dev) { @@ -3884,6 +3894,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, case PACKET_HDRLEN: if (len > sizeof(int)) len = sizeof(int); + if (len < sizeof(int)) + return -EINVAL; if (copy_from_user(&val, optval, len)) return -EFAULT; switch (val) { diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 5b2ab95afa07..169156cfd4c8 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -405,7 +405,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) ret = PTR_ERR(ic->i_send_cq); ic->i_send_cq = NULL; rdsdebug("ib_create_cq send failed: %d\n", ret); - goto out; + goto rds_ibdev_out; } cq_attr.cqe = ic->i_recv_ring.w_nr; @@ -416,19 +416,19 @@ static int rds_ib_setup_qp(struct rds_connection *conn) ret = PTR_ERR(ic->i_recv_cq); ic->i_recv_cq = NULL; rdsdebug("ib_create_cq recv failed: %d\n", ret); - goto out; + goto send_cq_out; } ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); if (ret) { rdsdebug("ib_req_notify_cq send failed: %d\n", ret); - goto out; + goto recv_cq_out; } ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); if (ret) { rdsdebug("ib_req_notify_cq recv failed: %d\n", ret); - goto out; + goto recv_cq_out; } /* XXX negotiate max send/recv with remote? */ @@ -453,7 +453,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); if (ret) { rdsdebug("rdma_create_qp failed: %d\n", ret); - goto out; + goto recv_cq_out; } ic->i_send_hdrs = ib_dma_alloc_coherent(dev, @@ -463,7 +463,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) if (!ic->i_send_hdrs) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent send failed\n"); - goto out; + goto qp_out; } ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, @@ -473,7 +473,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) if (!ic->i_recv_hdrs) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent recv failed\n"); - goto out; + goto send_hdrs_dma_out; } ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), @@ -481,7 +481,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) if (!ic->i_ack) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent ack failed\n"); - goto out; + goto recv_hdrs_dma_out; } ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work), @@ -489,7 +489,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) if (!ic->i_sends) { ret = -ENOMEM; rdsdebug("send allocation failed\n"); - goto out; + goto ack_dma_out; } ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work), @@ -497,7 +497,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) if (!ic->i_recvs) { ret = -ENOMEM; rdsdebug("recv allocation failed\n"); - goto out; + goto sends_out; } rds_ib_recv_init_ack(ic); @@ -505,8 +505,33 @@ static int rds_ib_setup_qp(struct rds_connection *conn) rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd, ic->i_send_cq, ic->i_recv_cq); -out: + return ret; + +sends_out: + vfree(ic->i_sends); +ack_dma_out: + ib_dma_free_coherent(dev, sizeof(struct rds_header), + ic->i_ack, ic->i_ack_dma); +recv_hdrs_dma_out: + ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr * + sizeof(struct rds_header), + ic->i_recv_hdrs, ic->i_recv_hdrs_dma); +send_hdrs_dma_out: + ib_dma_free_coherent(dev, ic->i_send_ring.w_nr * + sizeof(struct rds_header), + ic->i_send_hdrs, ic->i_send_hdrs_dma); +qp_out: + rdma_destroy_qp(ic->i_cm_id); +recv_cq_out: + if (!ib_destroy_cq(ic->i_recv_cq)) + ic->i_recv_cq = NULL; +send_cq_out: + if (!ib_destroy_cq(ic->i_send_cq)) + ic->i_send_cq = NULL; +rds_ibdev_out: + rds_ib_remove_conn(rds_ibdev, conn); rds_ib_dev_put(rds_ibdev); + return ret; } diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 84d90c97332f..191098173018 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -69,16 +69,6 @@ static void rds_ib_send_complete(struct rds_message *rm, complete(rm, notify_status); } -static void rds_ib_send_unmap_data(struct rds_ib_connection *ic, - struct rm_data_op *op, - int wc_status) -{ - if (op->op_nents) - ib_dma_unmap_sg(ic->i_cm_id->device, - op->op_sg, op->op_nents, - DMA_TO_DEVICE); -} - static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, struct rm_rdma_op *op, int wc_status) @@ -139,6 +129,21 @@ static void rds_ib_send_unmap_atomic(struct rds_ib_connection *ic, rds_ib_stats_inc(s_ib_atomic_fadd); } +static void rds_ib_send_unmap_data(struct rds_ib_connection *ic, + struct rm_data_op *op, + int wc_status) +{ + struct rds_message *rm = container_of(op, struct rds_message, data); + + if (op->op_nents) + ib_dma_unmap_sg(ic->i_cm_id->device, + op->op_sg, op->op_nents, + DMA_TO_DEVICE); + + if (rm->rdma.op_active && rm->data.op_notify) + rds_ib_send_unmap_rdma(ic, &rm->rdma, wc_status); +} + /* * Unmap the resources associated with a struct send_work. * diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 4c93badeabf2..8d3a851a3476 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -626,6 +626,16 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, } op->op_notifier->n_user_token = args->user_token; op->op_notifier->n_status = RDS_RDMA_SUCCESS; + + /* Enable rmda notification on data operation for composite + * rds messages and make sure notification is enabled only + * for the data operation which follows it so that application + * gets notified only after full message gets delivered. + */ + if (rm->data.op_sg) { + rm->rdma.op_notify = 0; + rm->data.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); + } } /* The cookie contains the R_Key of the remote memory region, and diff --git a/net/rds/rds.h b/net/rds/rds.h index 67ba67c058b1..f107a968ddff 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -414,6 +414,7 @@ struct rds_message { } rdma; struct rm_data_op { unsigned int op_active:1; + unsigned int op_notify:1; unsigned int op_nents; unsigned int op_count; unsigned int op_dmasg; diff --git a/net/rds/send.c b/net/rds/send.c index 896626b9a0ef..f28651b6ae83 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -475,12 +475,14 @@ void rds_rdma_send_complete(struct rds_message *rm, int status) struct rm_rdma_op *ro; struct rds_notifier *notifier; unsigned long flags; + unsigned int notify = 0; spin_lock_irqsave(&rm->m_rs_lock, flags); + notify = rm->rdma.op_notify | rm->data.op_notify; ro = &rm->rdma; if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) && - ro->op_active && ro->op_notify && ro->op_notifier) { + ro->op_active && notify && ro->op_notifier) { notifier = ro->op_notifier; rs = rm->m_rs; sock_hold(rds_rs_to_sk(rs)); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index c651cfce9be6..f3117324146a 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -141,7 +141,7 @@ static int tcf_del_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, hlist_for_each_entry_safe(p, n, head, tcfa_head) { ret = __tcf_hash_release(p, false, true); if (ret == ACT_P_DELETED) { - module_put(p->ops->owner); + module_put(ops->owner); n_i++; } else if (ret < 0) goto nla_put_failure; @@ -450,13 +450,15 @@ EXPORT_SYMBOL(tcf_action_exec); int tcf_action_destroy(struct list_head *actions, int bind) { + const struct tc_action_ops *ops; struct tc_action *a, *tmp; int ret = 0; list_for_each_entry_safe(a, tmp, actions, list) { + ops = a->ops; ret = __tcf_hash_release(a, bind, true); if (ret == ACT_P_DELETED) - module_put(a->ops->owner); + module_put(ops->owner); else if (ret < 0) return ret; } diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index b12bc2abea93..e75fb65037d7 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -32,6 +32,7 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (tc_skip_sw(head->flags)) return -1; + *res = head->res; return tcf_exts_exec(skb, &head->exts, res); } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6cfb6e9038c2..9016c8baf2aa 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -681,6 +681,7 @@ void qdisc_reset(struct Qdisc *qdisc) qdisc->gso_skb = NULL; } qdisc->q.qlen = 0; + qdisc->qstats.backlog = 0; } EXPORT_SYMBOL(qdisc_reset); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 75f290bddca1..272c34551979 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -702,6 +702,65 @@ found_pool: return task; } +/* create new threads */ +static int +svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +{ + struct svc_rqst *rqstp; + struct task_struct *task; + struct svc_pool *chosen_pool; + unsigned int state = serv->sv_nrthreads-1; + int node; + + do { + nrservs--; + chosen_pool = choose_pool(serv, pool, &state); + + node = svc_pool_map_get_node(chosen_pool->sp_id); + rqstp = svc_prepare_thread(serv, chosen_pool, node); + if (IS_ERR(rqstp)) + return PTR_ERR(rqstp); + + __module_get(serv->sv_ops->svo_module); + task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp, + node, "%s", serv->sv_name); + if (IS_ERR(task)) { + module_put(serv->sv_ops->svo_module); + svc_exit_thread(rqstp); + return PTR_ERR(task); + } + + rqstp->rq_task = task; + if (serv->sv_nrpools > 1) + svc_pool_map_set_cpumask(task, chosen_pool->sp_id); + + svc_sock_update_bufs(serv); + wake_up_process(task); + } while (nrservs > 0); + + return 0; +} + + +/* destroy old threads */ +static int +svc_signal_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +{ + struct task_struct *task; + unsigned int state = serv->sv_nrthreads-1; + + /* destroy old threads */ + do { + task = choose_victim(serv, pool, &state); + if (task == NULL) + break; + send_sig(SIGINT, task, 1); + nrservs++; + } while (nrservs < 0); + + return 0; +} + /* * Create or destroy enough new threads to make the number * of threads the given number. If `pool' is non-NULL, applies @@ -719,13 +778,6 @@ found_pool: int svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { - struct svc_rqst *rqstp; - struct task_struct *task; - struct svc_pool *chosen_pool; - int error = 0; - unsigned int state = serv->sv_nrthreads-1; - int node; - if (pool == NULL) { /* The -1 assumes caller has done a svc_get() */ nrservs -= (serv->sv_nrthreads-1); @@ -735,46 +787,52 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) spin_unlock_bh(&pool->sp_lock); } - /* create new threads */ - while (nrservs > 0) { - nrservs--; - chosen_pool = choose_pool(serv, pool, &state); - - node = svc_pool_map_get_node(chosen_pool->sp_id); - rqstp = svc_prepare_thread(serv, chosen_pool, node); - if (IS_ERR(rqstp)) { - error = PTR_ERR(rqstp); - break; - } - - __module_get(serv->sv_ops->svo_module); - task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp, - node, "%s", serv->sv_name); - if (IS_ERR(task)) { - error = PTR_ERR(task); - module_put(serv->sv_ops->svo_module); - svc_exit_thread(rqstp); - break; - } - - rqstp->rq_task = task; - if (serv->sv_nrpools > 1) - svc_pool_map_set_cpumask(task, chosen_pool->sp_id); - - svc_sock_update_bufs(serv); - wake_up_process(task); - } - /* destroy old threads */ - while (nrservs < 0 && - (task = choose_victim(serv, pool, &state)) != NULL) { - send_sig(SIGINT, task, 1); - nrservs++; - } - - return error; + if (nrservs > 0) + return svc_start_kthreads(serv, pool, nrservs); + if (nrservs < 0) + return svc_signal_kthreads(serv, pool, nrservs); + return 0; } EXPORT_SYMBOL_GPL(svc_set_num_threads); +/* destroy old threads */ +static int +svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +{ + struct task_struct *task; + unsigned int state = serv->sv_nrthreads-1; + + /* destroy old threads */ + do { + task = choose_victim(serv, pool, &state); + if (task == NULL) + break; + kthread_stop(task); + nrservs++; + } while (nrservs < 0); + return 0; +} + +int +svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +{ + if (pool == NULL) { + /* The -1 assumes caller has done a svc_get() */ + nrservs -= (serv->sv_nrthreads-1); + } else { + spin_lock_bh(&pool->sp_lock); + nrservs -= pool->sp_nrthreads; + spin_unlock_bh(&pool->sp_lock); + } + + if (nrservs > 0) + return svc_start_kthreads(serv, pool, nrservs); + if (nrservs < 0) + return svc_stop_kthreads(serv, pool, nrservs); + return 0; +} +EXPORT_SYMBOL_GPL(svc_set_num_threads_sync); + /* * Called from a server thread as it's exiting. Caller must hold the "service * mutex" for the service. diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 56ea0adcd285..912f1fb97c06 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -547,7 +547,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) return false; if (msg_errcode(msg)) return false; - *err = -TIPC_ERR_NO_NAME; + *err = TIPC_ERR_NO_NAME; if (skb_linearize(skb)) return false; msg = buf_msg(skb); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e9e9bc5c8773..c626f679e1c8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -541,6 +541,14 @@ nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = { [NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED }, }; +/* policy for packet pattern attributes */ +static const struct nla_policy +nl80211_packet_pattern_policy[MAX_NL80211_PKTPAT + 1] = { + [NL80211_PKTPAT_MASK] = { .type = NLA_BINARY, }, + [NL80211_PKTPAT_PATTERN] = { .type = NLA_BINARY, }, + [NL80211_PKTPAT_OFFSET] = { .type = NLA_U32 }, +}; + static int nl80211_prepare_wdev_dump(struct sk_buff *skb, struct netlink_callback *cb, struct cfg80211_registered_device **rdev, @@ -10009,7 +10017,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) u8 *mask_pat; nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat), - nla_len(pat), NULL); + nla_len(pat), nl80211_packet_pattern_policy); err = -EINVAL; if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) @@ -10259,7 +10267,7 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, u8 *mask_pat; nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat), - nla_len(pat), NULL); + nla_len(pat), nl80211_packet_pattern_policy); if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) return -EINVAL; @@ -10385,6 +10393,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) if (err) return err; + if (!tb[NL80211_REKEY_DATA_REPLAY_CTR] || !tb[NL80211_REKEY_DATA_KEK] || + !tb[NL80211_REKEY_DATA_KCK]) + return -EINVAL; if (nla_len(tb[NL80211_REKEY_DATA_REPLAY_CTR]) != NL80211_REPLAY_CTR_LEN) return -ERANGE; if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 637387bbaaea..d864a6d9e517 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -66,6 +66,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err) goto error_nolock; } + if (x->props.output_mark) + skb->mark = x->props.output_mark; + err = x->outer_mode->output(x, skb); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8ce5711ea21b..77fbfbdeffc8 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -125,7 +125,7 @@ static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr, - int family) + int family, u32 mark) { struct xfrm_policy_afinfo *afinfo; struct dst_entry *dst; @@ -134,7 +134,7 @@ static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, if (unlikely(afinfo == NULL)) return ERR_PTR(-EAFNOSUPPORT); - dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr); + dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr, mark); xfrm_policy_put_afinfo(afinfo); @@ -145,7 +145,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, int oif, xfrm_address_t *prev_saddr, xfrm_address_t *prev_daddr, - int family) + int family, u32 mark) { struct net *net = xs_net(x); xfrm_address_t *saddr = &x->props.saddr; @@ -161,7 +161,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, daddr = x->coaddr; } - dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family); + dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family, mark); if (!IS_ERR(dst)) { if (prev_saddr != saddr) @@ -1427,14 +1427,14 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) static int xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local, - xfrm_address_t *remote, unsigned short family) + xfrm_address_t *remote, unsigned short family, u32 mark) { int err; struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL)) return -EINVAL; - err = afinfo->get_saddr(net, oif, local, remote); + err = afinfo->get_saddr(net, oif, local, remote, mark); xfrm_policy_put_afinfo(afinfo); return err; } @@ -1465,7 +1465,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, if (xfrm_addr_any(local, tmpl->encap_family)) { error = xfrm_get_saddr(net, fl->flowi_oif, &tmp, remote, - tmpl->encap_family); + tmpl->encap_family, 0); if (error) goto fail; local = &tmp; @@ -1744,7 +1744,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { family = xfrm[i]->props.family; dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif, - &saddr, &daddr, family); + &saddr, &daddr, family, + xfrm[i]->props.output_mark); err = PTR_ERR(dst); if (IS_ERR(dst)) goto put_states; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b2bba35e90b6..ed4f571428a5 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -584,6 +584,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, xfrm_mark_get(attrs, &x->mark); + if (attrs[XFRMA_OUTPUT_MARK]) + x->props.output_mark = nla_get_u32(attrs[XFRMA_OUTPUT_MARK]); + err = __xfrm_init_state(x, false); if (err) goto error; @@ -871,6 +874,11 @@ static int copy_to_user_state_extra(struct xfrm_state *x, goto out; if (x->security) ret = copy_sec_ctx(x->security, skb); + if (x->props.output_mark) { + ret = nla_put_u32(skb, XFRMA_OUTPUT_MARK, x->props.output_mark); + if (ret) + goto out; + } out: return ret; } @@ -2419,6 +2427,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 }, [XFRMA_PROTO] = { .type = NLA_U8 }, [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, + [XFRMA_OUTPUT_MARK] = { .len = NLA_U32 }, }; static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { @@ -2635,6 +2644,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) l += nla_total_size(sizeof(*x->coaddr)); if (x->props.extra_flags) l += nla_total_size(sizeof(x->props.extra_flags)); + if (x->props.output_mark) + l += nla_total_size(sizeof(x->props.output_mark)); /* Must count x->lastused as it may become non-zero behind our back. */ l += nla_total_size_64bit(sizeof(u64)); diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 179219845dfc..140a0fa24f84 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -108,6 +108,11 @@ as-option = $(call try-run,\ as-instr = $(call try-run,\ printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3)) +# __cc-option +# Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586) +__cc-option = $(call try-run,\ + $(1) -Werror $(2) $(3) -c -x c /dev/null -o "$$TMP",$(3),$(4)) + # Do not attempt to build with gcc plugins during cc-option tests. # (And this uses delayed resolution so the flags will be up to date.) CC_OPTION_CFLAGS = $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS)) @@ -115,13 +120,18 @@ CC_OPTION_CFLAGS = $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS)) # cc-option # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586) -cc-option = $(call try-run,\ - $(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) +cc-option = $(call __cc-option, $(CC),\ + $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS),$(1),$(2)) + +# hostcc-option +# Usage: cflags-y += $(call hostcc-option,-march=winchip-c6,-march=i586) +hostcc-option = $(call __cc-option, $(HOSTCC),\ + $(HOSTCFLAGS) $(HOST_EXTRACFLAGS),$(1),$(2)) # cc-option-yn # Usage: flag := $(call cc-option-yn,-march=winchip-c6) cc-option-yn = $(call try-run,\ - $(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n) + $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n) # cc-option-align # Prefix align with either -falign or -malign @@ -131,7 +141,7 @@ cc-option-align = $(subst -functions=0,,\ # cc-disable-warning # Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable) cc-disable-warning = $(call try-run,\ - $(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) + $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) # cc-name # Expands to either gcc or clang diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 7675d11ee65e..3e3efd37ba47 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -176,6 +176,14 @@ cmd_cc_symtypes_c = \ $(obj)/%.symtypes : $(src)/%.c FORCE $(call cmd,cc_symtypes_c) +# LLVM assembly +# Generate .ll files from .c +quiet_cmd_cc_ll_c = CC $(quiet_modtag) $@ + cmd_cc_ll_c = $(CC) $(c_flags) -emit-llvm -S -o $@ $< + +$(obj)/%.ll: $(src)/%.c FORCE + $(call if_changed_dep,cc_ll_c) + # C (.c) files # The C file is compiled and updated dependency information is generated. # (See cmd_cc_o_c + relevant part of rule_cc_o_c) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 7c321a603b07..fb3522fd8702 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -64,7 +64,6 @@ ifeq ($(cc-name),clang) KBUILD_CFLAGS += $(call cc-disable-warning, initializer-overrides) KBUILD_CFLAGS += $(call cc-disable-warning, unused-value) KBUILD_CFLAGS += $(call cc-disable-warning, format) -KBUILD_CFLAGS += $(call cc-disable-warning, unknown-warning-option) KBUILD_CFLAGS += $(call cc-disable-warning, sign-compare) KBUILD_CFLAGS += $(call cc-disable-warning, format-zero-length) KBUILD_CFLAGS += $(call cc-disable-warning, uninitialized) diff --git a/scripts/Makefile.host b/scripts/Makefile.host index 45b5b1aaedbd..9cfd5c84d76f 100644 --- a/scripts/Makefile.host +++ b/scripts/Makefile.host @@ -20,12 +20,6 @@ # Will compile qconf as a C++ program, and menu as a C program. # They are linked as C++ code to the executable qconf -# hostcc-option -# Usage: cflags-y += $(call hostcc-option,-march=winchip-c6,-march=i586) - -hostcc-option = $(call try-run,\ - $(HOSTCC) $(HOSTCFLAGS) $(HOST_EXTRACFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) - __hostprogs := $(sort $(hostprogs-y) $(hostprogs-m)) host-cshlib := $(sort $(hostlibs-y) $(hostlibs-m)) host-cxxshlib := $(sort $(hostcxxlibs-y) $(hostcxxlibs-m)) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index d3d3320722cb..9e447b8497c6 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -414,3 +414,34 @@ quiet_cmd_xzmisc = XZMISC $@ cmd_xzmisc = (cat $(filter-out FORCE,$^) | \ xz --check=crc32 --lzma2=dict=1MiB) > $@ || \ (rm -f $@ ; false) + +# ASM offsets +# --------------------------------------------------------------------------- + +# Default sed regexp - multiline due to syntax constraints +# +# Use [:space:] because LLVM's integrated assembler inserts around +# the .ascii directive whereas GCC keeps the as-is. +define sed-offsets + 's:^[[:space:]]*\.ascii[[:space:]]*"\(.*\)".*:\1:; \ + /^->/{s:->#\(.*\):/* \1 */:; \ + s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \ + s:->::; p;}' +endef + +# Use filechk to avoid rebuilds when a header changes, but the resulting file +# does not +define filechk_offsets + (set -e; \ + echo "#ifndef $2"; \ + echo "#define $2"; \ + echo "/*"; \ + echo " * DO NOT MODIFY."; \ + echo " *"; \ + echo " * This file was generated by Kbuild"; \ + echo " */"; \ + echo ""; \ + sed -ne $(sed-offsets); \ + echo ""; \ + echo "#endif" ) +endef diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile index 19d9bcadc0cc..b497d9764dcf 100644 --- a/scripts/mod/Makefile +++ b/scripts/mod/Makefile @@ -7,32 +7,8 @@ modpost-objs := modpost.o file2alias.o sumversion.o devicetable-offsets-file := devicetable-offsets.h -define sed-y - "/^->/{s:->#\(.*\):/* \1 */:; \ - s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \ - s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \ - s:->::; p;}" -endef - -quiet_cmd_offsets = GEN $@ -define cmd_offsets - (set -e; \ - echo "#ifndef __DEVICETABLE_OFFSETS_H__"; \ - echo "#define __DEVICETABLE_OFFSETS_H__"; \ - echo "/*"; \ - echo " * DO NOT MODIFY."; \ - echo " *"; \ - echo " * This file was generated by Kbuild"; \ - echo " *"; \ - echo " */"; \ - echo ""; \ - sed -ne $(sed-y) $<; \ - echo ""; \ - echo "#endif" ) > $@ -endef - -$(obj)/$(devicetable-offsets-file): $(obj)/devicetable-offsets.s - $(call if_changed,offsets) +$(obj)/$(devicetable-offsets-file): $(obj)/devicetable-offsets.s FORCE + $(call filechk,offsets,__DEVICETABLE_OFFSETS_H__) targets += $(devicetable-offsets-file) devicetable-offsets.s diff --git a/security/keys/Kconfig b/security/keys/Kconfig index d942c7c2bc0a..e0a39781b10f 100644 --- a/security/keys/Kconfig +++ b/security/keys/Kconfig @@ -41,10 +41,8 @@ config BIG_KEYS bool "Large payload keys" depends on KEYS depends on TMPFS - depends on (CRYPTO_ANSI_CPRNG = y || CRYPTO_DRBG = y) select CRYPTO_AES - select CRYPTO_ECB - select CRYPTO_RNG + select CRYPTO_GCM help This option provides support for holding large keys within the kernel (for example Kerberos ticket caches). The data may be stored out to diff --git a/security/keys/big_key.c b/security/keys/big_key.c index 835c1ab30d01..e6288173d99d 100644 --- a/security/keys/big_key.c +++ b/security/keys/big_key.c @@ -1,5 +1,6 @@ /* Large capacity key type * + * Copyright (C) 2017 Jason A. Donenfeld . All Rights Reserved. * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * @@ -16,10 +17,10 @@ #include #include #include +#include #include #include -#include -#include +#include /* * Layout of key payload words. @@ -49,7 +50,12 @@ enum big_key_op { /* * Key size for big_key data encryption */ -#define ENC_KEY_SIZE 16 +#define ENC_KEY_SIZE 32 + +/* + * Authentication tag length + */ +#define ENC_AUTHTAG_SIZE 16 /* * big_key defined keys take an arbitrary string as the description and an @@ -64,57 +70,62 @@ struct key_type key_type_big_key = { .destroy = big_key_destroy, .describe = big_key_describe, .read = big_key_read, + /* no ->update(); don't add it without changing big_key_crypt() nonce */ }; /* - * Crypto names for big_key data encryption + * Crypto names for big_key data authenticated encryption */ -static const char big_key_rng_name[] = "stdrng"; -static const char big_key_alg_name[] = "ecb(aes)"; +static const char big_key_alg_name[] = "gcm(aes)"; /* - * Crypto algorithms for big_key data encryption + * Crypto algorithms for big_key data authenticated encryption */ -static struct crypto_rng *big_key_rng; -static struct crypto_skcipher *big_key_skcipher; +static struct crypto_aead *big_key_aead; /* - * Generate random key to encrypt big_key data + * Since changing the key affects the entire object, we need a mutex. */ -static inline int big_key_gen_enckey(u8 *key) -{ - return crypto_rng_get_bytes(big_key_rng, key, ENC_KEY_SIZE); -} +static DEFINE_MUTEX(big_key_aead_lock); /* * Encrypt/decrypt big_key data */ static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key) { - int ret = -EINVAL; + int ret; struct scatterlist sgio; - SKCIPHER_REQUEST_ON_STACK(req, big_key_skcipher); + struct aead_request *aead_req; + /* We always use a zero nonce. The reason we can get away with this is + * because we're using a different randomly generated key for every + * different encryption. Notably, too, key_type_big_key doesn't define + * an .update function, so there's no chance we'll wind up reusing the + * key to encrypt updated data. Simply put: one key, one encryption. + */ + u8 zero_nonce[crypto_aead_ivsize(big_key_aead)]; - if (crypto_skcipher_setkey(big_key_skcipher, key, ENC_KEY_SIZE)) { + aead_req = aead_request_alloc(big_key_aead, GFP_KERNEL); + if (!aead_req) + return -ENOMEM; + + memset(zero_nonce, 0, sizeof(zero_nonce)); + sg_init_one(&sgio, data, datalen + (op == BIG_KEY_ENC ? ENC_AUTHTAG_SIZE : 0)); + aead_request_set_crypt(aead_req, &sgio, &sgio, datalen, zero_nonce); + aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); + aead_request_set_ad(aead_req, 0); + + mutex_lock(&big_key_aead_lock); + if (crypto_aead_setkey(big_key_aead, key, ENC_KEY_SIZE)) { ret = -EAGAIN; goto error; } - - skcipher_request_set_tfm(req, big_key_skcipher); - skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, - NULL, NULL); - - sg_init_one(&sgio, data, datalen); - skcipher_request_set_crypt(req, &sgio, &sgio, datalen, NULL); - if (op == BIG_KEY_ENC) - ret = crypto_skcipher_encrypt(req); + ret = crypto_aead_encrypt(aead_req); else - ret = crypto_skcipher_decrypt(req); - - skcipher_request_zero(req); - + ret = crypto_aead_decrypt(aead_req); error: + mutex_unlock(&big_key_aead_lock); + aead_request_free(aead_req); return ret; } @@ -146,15 +157,13 @@ int big_key_preparse(struct key_preparsed_payload *prep) * * File content is stored encrypted with randomly generated key. */ - size_t enclen = ALIGN(datalen, crypto_skcipher_blocksize(big_key_skcipher)); + size_t enclen = datalen + ENC_AUTHTAG_SIZE; - /* prepare aligned data to encrypt */ data = kmalloc(enclen, GFP_KERNEL); if (!data) return -ENOMEM; memcpy(data, prep->data, datalen); - memset(data + datalen, 0x00, enclen - datalen); /* generate random key */ enckey = kmalloc(ENC_KEY_SIZE, GFP_KERNEL); @@ -162,13 +171,10 @@ int big_key_preparse(struct key_preparsed_payload *prep) ret = -ENOMEM; goto error; } - - ret = big_key_gen_enckey(enckey); - if (ret) - goto err_enckey; + get_random_bytes(enckey, ENC_KEY_SIZE); /* encrypt aligned data */ - ret = big_key_crypt(BIG_KEY_ENC, data, enclen, enckey); + ret = big_key_crypt(BIG_KEY_ENC, data, datalen, enckey); if (ret) goto err_enckey; @@ -194,7 +200,7 @@ int big_key_preparse(struct key_preparsed_payload *prep) *path = file->f_path; path_get(path); fput(file); - kfree(data); + kzfree(data); } else { /* Just store the data in a buffer */ void *data = kmalloc(datalen, GFP_KERNEL); @@ -210,9 +216,9 @@ int big_key_preparse(struct key_preparsed_payload *prep) err_fput: fput(file); err_enckey: - kfree(enckey); + kzfree(enckey); error: - kfree(data); + kzfree(data); return ret; } @@ -226,7 +232,7 @@ void big_key_free_preparse(struct key_preparsed_payload *prep) path_put(path); } - kfree(prep->payload.data[big_key_data]); + kzfree(prep->payload.data[big_key_data]); } /* @@ -239,7 +245,7 @@ void big_key_revoke(struct key *key) /* clear the quota */ key_payload_reserve(key, 0); - if (key_is_instantiated(key) && + if (key_is_positive(key) && (size_t)key->payload.data[big_key_len] > BIG_KEY_FILE_THRESHOLD) vfs_truncate(path, 0); } @@ -258,7 +264,7 @@ void big_key_destroy(struct key *key) path->mnt = NULL; path->dentry = NULL; } - kfree(key->payload.data[big_key_data]); + kzfree(key->payload.data[big_key_data]); key->payload.data[big_key_data] = NULL; } @@ -271,7 +277,7 @@ void big_key_describe(const struct key *key, struct seq_file *m) seq_puts(m, key->description); - if (key_is_instantiated(key)) + if (key_is_positive(key)) seq_printf(m, ": %zu [%s]", datalen, datalen > BIG_KEY_FILE_THRESHOLD ? "file" : "buff"); @@ -294,7 +300,7 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen) struct file *file; u8 *data; u8 *enckey = (u8 *)key->payload.data[big_key_data]; - size_t enclen = ALIGN(datalen, crypto_skcipher_blocksize(big_key_skcipher)); + size_t enclen = datalen + ENC_AUTHTAG_SIZE; data = kmalloc(enclen, GFP_KERNEL); if (!data) @@ -326,7 +332,7 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen) err_fput: fput(file); error: - kfree(data); + kzfree(data); } else { ret = datalen; if (copy_to_user(buffer, key->payload.data[big_key_data], @@ -342,47 +348,31 @@ error: */ static int __init big_key_init(void) { - struct crypto_skcipher *cipher; - struct crypto_rng *rng; int ret; - rng = crypto_alloc_rng(big_key_rng_name, 0, 0); - if (IS_ERR(rng)) { - pr_err("Can't alloc rng: %ld\n", PTR_ERR(rng)); - return PTR_ERR(rng); - } - - big_key_rng = rng; - - /* seed RNG */ - ret = crypto_rng_reset(rng, NULL, crypto_rng_seedsize(rng)); - if (ret) { - pr_err("Can't reset rng: %d\n", ret); - goto error_rng; - } - /* init block cipher */ - cipher = crypto_alloc_skcipher(big_key_alg_name, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(cipher)) { - ret = PTR_ERR(cipher); + big_key_aead = crypto_alloc_aead(big_key_alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(big_key_aead)) { + ret = PTR_ERR(big_key_aead); pr_err("Can't alloc crypto: %d\n", ret); - goto error_rng; + return ret; + } + ret = crypto_aead_setauthsize(big_key_aead, ENC_AUTHTAG_SIZE); + if (ret < 0) { + pr_err("Can't set crypto auth tag len: %d\n", ret); + goto free_aead; } - - big_key_skcipher = cipher; ret = register_key_type(&key_type_big_key); if (ret < 0) { pr_err("Can't register type: %d\n", ret); - goto error_cipher; + goto free_aead; } return 0; -error_cipher: - crypto_free_skcipher(big_key_skcipher); -error_rng: - crypto_free_rng(big_key_rng); +free_aead: + crypto_free_aead(big_key_aead); return ret; } diff --git a/security/keys/dh.c b/security/keys/dh.c index 531ed2ec132f..893af4c45038 100644 --- a/security/keys/dh.c +++ b/security/keys/dh.c @@ -55,7 +55,7 @@ static ssize_t mpi_from_key(key_serial_t keyid, size_t maxlen, MPI *mpi) if (status == 0) { const struct user_key_payload *payload; - payload = user_key_payload(key); + payload = user_key_payload_locked(key); if (maxlen == 0) { *mpi = NULL; diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 8d9330aba77e..ae70e828da2e 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -314,7 +314,14 @@ static struct key *request_user_key(const char *master_desc, const u8 **master_k goto error; down_read(&ukey->sem); - upayload = user_key_payload(ukey); + upayload = user_key_payload_locked(ukey); + if (!upayload) { + /* key was revoked before we acquired its semaphore */ + up_read(&ukey->sem); + key_put(ukey); + ukey = ERR_PTR(-EKEYREVOKED); + goto error; + } *master_key = upayload->data; *master_keylen = upayload->datalen; error: @@ -867,7 +874,7 @@ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep) size_t datalen = prep->datalen; int ret = 0; - if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) + if (key_is_negative(key)) return -ENOKEY; if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; @@ -927,7 +934,7 @@ static long encrypted_read(const struct key *key, char __user *buffer, size_t asciiblob_len; int ret; - epayload = rcu_dereference_key(key); + epayload = dereference_key_locked(key); /* returns the hex encoded iv, encrypted-data, and hmac as ascii */ asciiblob_len = epayload->datablob_len + ivsize + 1 diff --git a/security/keys/gc.c b/security/keys/gc.c index 9cb4fe4478a1..1659094d684d 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -129,15 +129,15 @@ static noinline void key_gc_unused_keys(struct list_head *keys) while (!list_empty(keys)) { struct key *key = list_entry(keys->next, struct key, graveyard_link); + short state = key->state; + list_del(&key->graveyard_link); kdebug("- %u", key->serial); key_check(key); /* Throw away the key data if the key is instantiated */ - if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags) && - !test_bit(KEY_FLAG_NEGATIVE, &key->flags) && - key->type->destroy) + if (state == KEY_IS_POSITIVE && key->type->destroy) key->type->destroy(key); security_key_free(key); @@ -151,7 +151,7 @@ static noinline void key_gc_unused_keys(struct list_head *keys) } atomic_dec(&key->user->nkeys); - if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) + if (state != KEY_IS_UNINSTANTIATED) atomic_dec(&key->user->nikeys); key_user_put(key->user); diff --git a/security/keys/internal.h b/security/keys/internal.h index a705a7d92ad7..fb0c65049c19 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -137,7 +137,7 @@ extern key_ref_t keyring_search_aux(key_ref_t keyring_ref, extern key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx); extern key_ref_t search_process_keyrings(struct keyring_search_context *ctx); -extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check); +extern struct key *find_keyring_by_name(const char *name, bool uid_keyring); extern int install_user_keyrings(void); extern int install_thread_keyring_to_cred(struct cred *); diff --git a/security/keys/key.c b/security/keys/key.c index 2f4ce35ae2aa..7dc59069e8c7 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -301,6 +301,8 @@ struct key *key_alloc(struct key_type *type, const char *desc, key->flags |= 1 << KEY_FLAG_IN_QUOTA; if (flags & KEY_ALLOC_BUILT_IN) key->flags |= 1 << KEY_FLAG_BUILTIN; + if (flags & KEY_ALLOC_UID_KEYRING) + key->flags |= 1 << KEY_FLAG_UID_KEYRING; #ifdef KEY_DEBUGGING key->magic = KEY_DEBUG_MAGIC; @@ -398,6 +400,18 @@ int key_payload_reserve(struct key *key, size_t datalen) } EXPORT_SYMBOL(key_payload_reserve); +/* + * Change the key state to being instantiated. + */ +static void mark_key_instantiated(struct key *key, int reject_error) +{ + /* Commit the payload before setting the state; barrier versus + * key_read_state(). + */ + smp_store_release(&key->state, + (reject_error < 0) ? reject_error : KEY_IS_POSITIVE); +} + /* * Instantiate a key and link it into the target keyring atomically. Must be * called with the target keyring's semaphore writelocked. The target key's @@ -421,14 +435,14 @@ static int __key_instantiate_and_link(struct key *key, mutex_lock(&key_construction_mutex); /* can't instantiate twice */ - if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) { + if (key->state == KEY_IS_UNINSTANTIATED) { /* instantiate the key */ ret = key->type->instantiate(key, prep); if (ret == 0) { /* mark the key as being instantiated */ atomic_inc(&key->user->nikeys); - set_bit(KEY_FLAG_INSTANTIATED, &key->flags); + mark_key_instantiated(key, 0); if (test_and_clear_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags)) awaken = 1; @@ -570,13 +584,10 @@ int key_reject_and_link(struct key *key, mutex_lock(&key_construction_mutex); /* can't instantiate twice */ - if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) { + if (key->state == KEY_IS_UNINSTANTIATED) { /* mark the key as being negatively instantiated */ atomic_inc(&key->user->nikeys); - key->reject_error = -error; - smp_wmb(); - set_bit(KEY_FLAG_NEGATIVE, &key->flags); - set_bit(KEY_FLAG_INSTANTIATED, &key->flags); + mark_key_instantiated(key, -error); now = current_kernel_time(); key->expiry = now.tv_sec + timeout; key_schedule_gc(key->expiry + key_gc_delay); @@ -748,8 +759,8 @@ static inline key_ref_t __key_update(key_ref_t key_ref, ret = key->type->update(key, prep); if (ret == 0) - /* updating a negative key instantiates it */ - clear_bit(KEY_FLAG_NEGATIVE, &key->flags); + /* Updating a negative key positively instantiates it */ + mark_key_instantiated(key, 0); up_write(&key->sem); @@ -933,6 +944,16 @@ error: */ __key_link_end(keyring, &index_key, edit); + key = key_ref_to_ptr(key_ref); + if (test_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags)) { + ret = wait_for_key_construction(key, true); + if (ret < 0) { + key_ref_put(key_ref); + key_ref = ERR_PTR(ret); + goto error_free_prep; + } + } + key_ref = __key_update(key_ref, &prep); goto error_free_prep; } @@ -983,8 +1004,8 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen) ret = key->type->update(key, &prep); if (ret == 0) - /* updating a negative key instantiates it */ - clear_bit(KEY_FLAG_NEGATIVE, &key->flags); + /* Updating a negative key positively instantiates it */ + mark_key_instantiated(key, 0); up_write(&key->sem); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index ada12c3e3ac4..797edcf1d424 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -766,6 +766,10 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) key = key_ref_to_ptr(key_ref); + ret = key_read_state(key); + if (ret < 0) + goto error2; /* Negatively instantiated */ + /* see if we can read it directly */ ret = key_permission(key_ref, KEY_NEED_READ); if (ret == 0) @@ -896,7 +900,7 @@ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group) atomic_dec(&key->user->nkeys); atomic_inc(&newowner->nkeys); - if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) { + if (key->state != KEY_IS_UNINSTANTIATED) { atomic_dec(&key->user->nikeys); atomic_inc(&newowner->nikeys); } diff --git a/security/keys/keyring.c b/security/keys/keyring.c index c91e4e0cea08..32969f630438 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -407,7 +407,7 @@ static void keyring_describe(const struct key *keyring, struct seq_file *m) else seq_puts(m, "[anon]"); - if (key_is_instantiated(keyring)) { + if (key_is_positive(keyring)) { if (keyring->keys.nr_leaves_on_tree != 0) seq_printf(m, ": %lu", keyring->keys.nr_leaves_on_tree); else @@ -416,7 +416,7 @@ static void keyring_describe(const struct key *keyring, struct seq_file *m) } struct keyring_read_iterator_context { - size_t qty; + size_t buflen; size_t count; key_serial_t __user *buffer; }; @@ -428,9 +428,9 @@ static int keyring_read_iterator(const void *object, void *data) int ret; kenter("{%s,%d},,{%zu/%zu}", - key->type->name, key->serial, ctx->count, ctx->qty); + key->type->name, key->serial, ctx->count, ctx->buflen); - if (ctx->count >= ctx->qty) + if (ctx->count >= ctx->buflen) return 1; ret = put_user(key->serial, ctx->buffer); @@ -465,16 +465,12 @@ static long keyring_read(const struct key *keyring, return 0; /* Calculate how much data we could return */ - ctx.qty = nr_keys * sizeof(key_serial_t); - if (!buffer || !buflen) - return ctx.qty; - - if (buflen > ctx.qty) - ctx.qty = buflen; + return nr_keys * sizeof(key_serial_t); /* Copy the IDs of the subscribed keys into the buffer */ ctx.buffer = (key_serial_t __user *)buffer; + ctx.buflen = buflen; ctx.count = 0; ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx); if (ret < 0) { @@ -550,7 +546,8 @@ static int keyring_search_iterator(const void *object, void *iterator_data) { struct keyring_search_context *ctx = iterator_data; const struct key *key = keyring_ptr_to_key(object); - unsigned long kflags = key->flags; + unsigned long kflags = READ_ONCE(key->flags); + short state = READ_ONCE(key->state); kenter("{%d}", key->serial); @@ -594,9 +591,8 @@ static int keyring_search_iterator(const void *object, void *iterator_data) if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) { /* we set a different error code if we pass a negative key */ - if (kflags & (1 << KEY_FLAG_NEGATIVE)) { - smp_rmb(); - ctx->result = ERR_PTR(key->reject_error); + if (state < 0) { + ctx->result = ERR_PTR(state); kleave(" = %d [neg]", ctx->skipped_ret); goto skipped; } @@ -989,15 +985,15 @@ found: /* * Find a keyring with the specified name. * - * All named keyrings in the current user namespace are searched, provided they - * grant Search permission directly to the caller (unless this check is - * skipped). Keyrings whose usage points have reached zero or who have been - * revoked are skipped. + * Only keyrings that have nonzero refcount, are not revoked, and are owned by a + * user in the current user namespace are considered. If @uid_keyring is %true, + * the keyring additionally must have been allocated as a user or user session + * keyring; otherwise, it must grant Search permission directly to the caller. * * Returns a pointer to the keyring with the keyring's refcount having being * incremented on success. -ENOKEY is returned if a key could not be found. */ -struct key *find_keyring_by_name(const char *name, bool skip_perm_check) +struct key *find_keyring_by_name(const char *name, bool uid_keyring) { struct key *keyring; int bucket; @@ -1025,10 +1021,15 @@ struct key *find_keyring_by_name(const char *name, bool skip_perm_check) if (strcmp(keyring->description, name) != 0) continue; - if (!skip_perm_check && - key_permission(make_key_ref(keyring, 0), - KEY_NEED_SEARCH) < 0) - continue; + if (uid_keyring) { + if (!test_bit(KEY_FLAG_UID_KEYRING, + &keyring->flags)) + continue; + } else { + if (key_permission(make_key_ref(keyring, 0), + KEY_NEED_SEARCH) < 0) + continue; + } /* we've got a match but we might end up racing with * key_cleanup() if the keyring is currently 'dead' diff --git a/security/keys/proc.c b/security/keys/proc.c index b9f531c9e4fa..036128682463 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -182,6 +182,7 @@ static int proc_keys_show(struct seq_file *m, void *v) unsigned long timo; key_ref_t key_ref, skey_ref; char xbuf[16]; + short state; int rc; struct keyring_search_context ctx = { @@ -240,17 +241,19 @@ static int proc_keys_show(struct seq_file *m, void *v) sprintf(xbuf, "%luw", timo / (60*60*24*7)); } + state = key_read_state(key); + #define showflag(KEY, LETTER, FLAG) \ (test_bit(FLAG, &(KEY)->flags) ? LETTER : '-') seq_printf(m, "%08x %c%c%c%c%c%c%c %5d %4s %08x %5d %5d %-9.9s ", key->serial, - showflag(key, 'I', KEY_FLAG_INSTANTIATED), + state != KEY_IS_UNINSTANTIATED ? 'I' : '-', showflag(key, 'R', KEY_FLAG_REVOKED), showflag(key, 'D', KEY_FLAG_DEAD), showflag(key, 'Q', KEY_FLAG_IN_QUOTA), showflag(key, 'U', KEY_FLAG_USER_CONSTRUCT), - showflag(key, 'N', KEY_FLAG_NEGATIVE), + state < 0 ? 'N' : '-', showflag(key, 'i', KEY_FLAG_INVALIDATED), atomic_read(&key->usage), xbuf, diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 45536c677b05..2d35d71d7d9a 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -76,7 +76,8 @@ int install_user_keyrings(void) if (IS_ERR(uid_keyring)) { uid_keyring = keyring_alloc(buf, user->uid, INVALID_GID, cred, user_keyring_perm, - KEY_ALLOC_IN_QUOTA, + KEY_ALLOC_UID_KEYRING | + KEY_ALLOC_IN_QUOTA, NULL, NULL); if (IS_ERR(uid_keyring)) { ret = PTR_ERR(uid_keyring); @@ -93,7 +94,8 @@ int install_user_keyrings(void) session_keyring = keyring_alloc(buf, user->uid, INVALID_GID, cred, user_keyring_perm, - KEY_ALLOC_IN_QUOTA, + KEY_ALLOC_UID_KEYRING | + KEY_ALLOC_IN_QUOTA, NULL, NULL); if (IS_ERR(session_keyring)) { ret = PTR_ERR(session_keyring); @@ -727,7 +729,7 @@ try_again: ret = -EIO; if (!(lflags & KEY_LOOKUP_PARTIAL) && - !test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) + key_read_state(key) == KEY_IS_UNINSTANTIATED) goto invalid_key; /* check the permissions */ diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 43affcf10b22..5030fcf23681 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -594,10 +594,9 @@ int wait_for_key_construction(struct key *key, bool intr) intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); if (ret) return -ERESTARTSYS; - if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) { - smp_rmb(); - return key->reject_error; - } + ret = key_read_state(key); + if (ret < 0) + return ret; return key_validate(key); } EXPORT_SYMBOL(wait_for_key_construction); diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 9db8b4a82787..ba74a0b4d1cb 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -73,7 +73,7 @@ static void request_key_auth_describe(const struct key *key, seq_puts(m, "key:"); seq_puts(m, key->description); - if (key_is_instantiated(key)) + if (key_is_positive(key)) seq_printf(m, " pid:%d ci:%zu", rka->pid, rka->callout_len); } diff --git a/security/keys/trusted.c b/security/keys/trusted.c index 90d61751ff12..2a82fba7e5c1 100644 --- a/security/keys/trusted.c +++ b/security/keys/trusted.c @@ -1067,7 +1067,7 @@ static int trusted_update(struct key *key, struct key_preparsed_payload *prep) char *datablob; int ret = 0; - if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) + if (key_is_negative(key)) return -ENOKEY; p = key->payload.data[0]; if (!p->migratable) @@ -1140,12 +1140,12 @@ out: static long trusted_read(const struct key *key, char __user *buffer, size_t buflen) { - struct trusted_key_payload *p; + const struct trusted_key_payload *p; char *ascii_buf; char *bufp; int i; - p = rcu_dereference_key(key); + p = dereference_key_locked(key); if (!p) return -EINVAL; if (!buffer || buflen <= 0) diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c index 66b1840b4110..b4c170af7532 100644 --- a/security/keys/user_defined.c +++ b/security/keys/user_defined.c @@ -106,8 +106,8 @@ int user_update(struct key *key, struct key_preparsed_payload *prep) /* attach the new data, displacing the old */ key->expiry = prep->expiry; - if (!test_bit(KEY_FLAG_NEGATIVE, &key->flags)) - zap = rcu_dereference_key(key); + if (key_is_positive(key)) + zap = dereference_key_locked(key); rcu_assign_keypointer(key, prep->payload.data[0]); prep->payload.data[0] = NULL; @@ -123,7 +123,7 @@ EXPORT_SYMBOL_GPL(user_update); */ void user_revoke(struct key *key) { - struct user_key_payload *upayload = key->payload.data[0]; + struct user_key_payload *upayload = user_key_payload_locked(key); /* clear the quota */ key_payload_reserve(key, 0); @@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(user_destroy); void user_describe(const struct key *key, struct seq_file *m) { seq_puts(m, key->description); - if (key_is_instantiated(key)) + if (key_is_positive(key)) seq_printf(m, ": %u", key->datalen); } @@ -169,7 +169,7 @@ long user_read(const struct key *key, char __user *buffer, size_t buflen) const struct user_key_payload *upayload; long ret; - upayload = user_key_payload(key); + upayload = user_key_payload_locked(key); ret = upayload->datalen; /* we can return the data as is */ diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 1cb060293505..a8a7fbc377f6 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1486,7 +1486,7 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name) * @inode: the object * @name: attribute name * @buffer: where to put the result - * @alloc: unused + * @alloc: duplicate memory * * Returns the size of the attribute or an error code */ @@ -1499,43 +1499,38 @@ static int smack_inode_getsecurity(struct inode *inode, struct super_block *sbp; struct inode *ip = (struct inode *)inode; struct smack_known *isp; - int ilen; - int rc = 0; - if (strcmp(name, XATTR_SMACK_SUFFIX) == 0) { + if (strcmp(name, XATTR_SMACK_SUFFIX) == 0) isp = smk_of_inode(inode); - ilen = strlen(isp->smk_known); - *buffer = isp->smk_known; - return ilen; + else { + /* + * The rest of the Smack xattrs are only on sockets. + */ + sbp = ip->i_sb; + if (sbp->s_magic != SOCKFS_MAGIC) + return -EOPNOTSUPP; + + sock = SOCKET_I(ip); + if (sock == NULL || sock->sk == NULL) + return -EOPNOTSUPP; + + ssp = sock->sk->sk_security; + + if (strcmp(name, XATTR_SMACK_IPIN) == 0) + isp = ssp->smk_in; + else if (strcmp(name, XATTR_SMACK_IPOUT) == 0) + isp = ssp->smk_out; + else + return -EOPNOTSUPP; } - /* - * The rest of the Smack xattrs are only on sockets. - */ - sbp = ip->i_sb; - if (sbp->s_magic != SOCKFS_MAGIC) - return -EOPNOTSUPP; - - sock = SOCKET_I(ip); - if (sock == NULL || sock->sk == NULL) - return -EOPNOTSUPP; - - ssp = sock->sk->sk_security; - - if (strcmp(name, XATTR_SMACK_IPIN) == 0) - isp = ssp->smk_in; - else if (strcmp(name, XATTR_SMACK_IPOUT) == 0) - isp = ssp->smk_out; - else - return -EOPNOTSUPP; - - ilen = strlen(isp->smk_known); - if (rc == 0) { - *buffer = isp->smk_known; - rc = ilen; + if (alloc) { + *buffer = kstrdup(isp->smk_known, GFP_KERNEL); + if (*buffer == NULL) + return -ENOMEM; } - return rc; + return strlen(isp->smk_known); } diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index fec1dfdb14ad..4490a699030b 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -948,14 +948,13 @@ static const struct file_operations snd_compr_file_ops = { static int snd_compress_dev_register(struct snd_device *device) { int ret = -EINVAL; - char str[16]; struct snd_compr *compr; if (snd_BUG_ON(!device || !device->device_data)) return -EBADFD; compr = device->device_data; - pr_debug("reg %s for device %s, direction %d\n", str, compr->name, + pr_debug("reg device %s, direction %d\n", compr->name, compr->direction); /* register compressed device */ ret = snd_register_device(SNDRV_DEVICE_TYPE_COMPRESS, diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 67c4c68ce041..c41148353e19 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -1259,6 +1259,7 @@ static int snd_seq_ioctl_create_port(struct snd_seq_client *client, void *arg) struct snd_seq_port_info *info = arg; struct snd_seq_client_port *port; struct snd_seq_port_callback *callback; + int port_idx; /* it is not allowed to create the port for an another client */ if (info->addr.client != client->number) @@ -1269,7 +1270,9 @@ static int snd_seq_ioctl_create_port(struct snd_seq_client *client, void *arg) return -ENOMEM; if (client->type == USER_CLIENT && info->kernel) { - snd_seq_delete_port(client, port->addr.port); + port_idx = port->addr.port; + snd_seq_port_unlock(port); + snd_seq_delete_port(client, port_idx); return -EINVAL; } if (client->type == KERNEL_CLIENT) { @@ -1290,6 +1293,7 @@ static int snd_seq_ioctl_create_port(struct snd_seq_client *client, void *arg) snd_seq_set_port_info(port, info); snd_seq_system_client_ev_port_start(port->addr.client, port->addr.port); + snd_seq_port_unlock(port); return 0; } diff --git a/sound/core/seq/seq_lock.c b/sound/core/seq/seq_lock.c index 12ba83367b1b..ba5752ee9af3 100644 --- a/sound/core/seq/seq_lock.c +++ b/sound/core/seq/seq_lock.c @@ -23,8 +23,6 @@ #include #include "seq_lock.h" -#if defined(CONFIG_SMP) || defined(CONFIG_SND_DEBUG) - /* wait until all locks are released */ void snd_use_lock_sync_helper(snd_use_lock_t *lockp, const char *file, int line) { @@ -42,5 +40,3 @@ void snd_use_lock_sync_helper(snd_use_lock_t *lockp, const char *file, int line) } EXPORT_SYMBOL(snd_use_lock_sync_helper); - -#endif diff --git a/sound/core/seq/seq_lock.h b/sound/core/seq/seq_lock.h index 54044bc2c9ef..ac38031c370e 100644 --- a/sound/core/seq/seq_lock.h +++ b/sound/core/seq/seq_lock.h @@ -3,8 +3,6 @@ #include -#if defined(CONFIG_SMP) || defined(CONFIG_SND_DEBUG) - typedef atomic_t snd_use_lock_t; /* initialize lock */ @@ -20,14 +18,4 @@ typedef atomic_t snd_use_lock_t; void snd_use_lock_sync_helper(snd_use_lock_t *lock, const char *file, int line); #define snd_use_lock_sync(lockp) snd_use_lock_sync_helper(lockp, __BASE_FILE__, __LINE__) -#else /* SMP || CONFIG_SND_DEBUG */ - -typedef spinlock_t snd_use_lock_t; /* dummy */ -#define snd_use_lock_init(lockp) /**/ -#define snd_use_lock_use(lockp) /**/ -#define snd_use_lock_free(lockp) /**/ -#define snd_use_lock_sync(lockp) /**/ - -#endif /* SMP || CONFIG_SND_DEBUG */ - #endif /* __SND_SEQ_LOCK_H */ diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c index fe686ee41c6d..f04714d70bf7 100644 --- a/sound/core/seq/seq_ports.c +++ b/sound/core/seq/seq_ports.c @@ -122,7 +122,9 @@ static void port_subs_info_init(struct snd_seq_port_subs_info *grp) } -/* create a port, port number is returned (-1 on failure) */ +/* create a port, port number is returned (-1 on failure); + * the caller needs to unref the port via snd_seq_port_unlock() appropriately + */ struct snd_seq_client_port *snd_seq_create_port(struct snd_seq_client *client, int port) { @@ -151,6 +153,7 @@ struct snd_seq_client_port *snd_seq_create_port(struct snd_seq_client *client, snd_use_lock_init(&new_port->use_lock); port_subs_info_init(&new_port->c_src); port_subs_info_init(&new_port->c_dest); + snd_use_lock_use(&new_port->use_lock); num = port >= 0 ? port : 0; mutex_lock(&client->ports_mutex); @@ -165,9 +168,9 @@ struct snd_seq_client_port *snd_seq_create_port(struct snd_seq_client *client, list_add_tail(&new_port->list, &p->list); client->num_ports++; new_port->addr.port = num; /* store the port number in the port */ + sprintf(new_port->name, "port-%d", num); write_unlock_irqrestore(&client->ports_lock, flags); mutex_unlock(&client->ports_mutex); - sprintf(new_port->name, "port-%d", num); return new_port; } diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c index c82ed3e70506..200764948ed1 100644 --- a/sound/core/seq/seq_virmidi.c +++ b/sound/core/seq/seq_virmidi.c @@ -77,13 +77,17 @@ static void snd_virmidi_init_event(struct snd_virmidi *vmidi, * decode input event and put to read buffer of each opened file */ static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev, - struct snd_seq_event *ev) + struct snd_seq_event *ev, + bool atomic) { struct snd_virmidi *vmidi; unsigned char msg[4]; int len; - read_lock(&rdev->filelist_lock); + if (atomic) + read_lock(&rdev->filelist_lock); + else + down_read(&rdev->filelist_sem); list_for_each_entry(vmidi, &rdev->filelist, list) { if (!vmidi->trigger) continue; @@ -97,7 +101,10 @@ static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev, snd_rawmidi_receive(vmidi->substream, msg, len); } } - read_unlock(&rdev->filelist_lock); + if (atomic) + read_unlock(&rdev->filelist_lock); + else + up_read(&rdev->filelist_sem); return 0; } @@ -115,7 +122,7 @@ int snd_virmidi_receive(struct snd_rawmidi *rmidi, struct snd_seq_event *ev) struct snd_virmidi_dev *rdev; rdev = rmidi->private_data; - return snd_virmidi_dev_receive_event(rdev, ev); + return snd_virmidi_dev_receive_event(rdev, ev, true); } #endif /* 0 */ @@ -130,7 +137,7 @@ static int snd_virmidi_event_input(struct snd_seq_event *ev, int direct, rdev = private_data; if (!(rdev->flags & SNDRV_VIRMIDI_USE)) return 0; /* ignored */ - return snd_virmidi_dev_receive_event(rdev, ev); + return snd_virmidi_dev_receive_event(rdev, ev, atomic); } /* @@ -209,7 +216,6 @@ static int snd_virmidi_input_open(struct snd_rawmidi_substream *substream) struct snd_virmidi_dev *rdev = substream->rmidi->private_data; struct snd_rawmidi_runtime *runtime = substream->runtime; struct snd_virmidi *vmidi; - unsigned long flags; vmidi = kzalloc(sizeof(*vmidi), GFP_KERNEL); if (vmidi == NULL) @@ -223,9 +229,11 @@ static int snd_virmidi_input_open(struct snd_rawmidi_substream *substream) vmidi->client = rdev->client; vmidi->port = rdev->port; runtime->private_data = vmidi; - write_lock_irqsave(&rdev->filelist_lock, flags); + down_write(&rdev->filelist_sem); + write_lock_irq(&rdev->filelist_lock); list_add_tail(&vmidi->list, &rdev->filelist); - write_unlock_irqrestore(&rdev->filelist_lock, flags); + write_unlock_irq(&rdev->filelist_lock); + up_write(&rdev->filelist_sem); vmidi->rdev = rdev; return 0; } @@ -264,9 +272,11 @@ static int snd_virmidi_input_close(struct snd_rawmidi_substream *substream) struct snd_virmidi_dev *rdev = substream->rmidi->private_data; struct snd_virmidi *vmidi = substream->runtime->private_data; + down_write(&rdev->filelist_sem); write_lock_irq(&rdev->filelist_lock); list_del(&vmidi->list); write_unlock_irq(&rdev->filelist_lock); + up_write(&rdev->filelist_sem); snd_midi_event_free(vmidi->parser); substream->runtime->private_data = NULL; kfree(vmidi); @@ -520,6 +530,7 @@ int snd_virmidi_new(struct snd_card *card, int device, struct snd_rawmidi **rrmi rdev->rmidi = rmidi; rdev->device = device; rdev->client = -1; + init_rwsem(&rdev->filelist_sem); rwlock_init(&rdev->filelist_lock); INIT_LIST_HEAD(&rdev->filelist); rdev->seq_mode = SNDRV_VIRMIDI_SEQ_DISPATCH; diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c index 0f41257d339e..8761877207ec 100644 --- a/sound/hda/hdac_controller.c +++ b/sound/hda/hdac_controller.c @@ -284,6 +284,11 @@ int snd_hdac_bus_parse_capabilities(struct hdac_bus *bus) dev_dbg(bus->dev, "HDA capability ID: 0x%x\n", (cur_cap & AZX_CAP_HDR_ID_MASK) >> AZX_CAP_HDR_ID_OFF); + if (cur_cap == -1) { + dev_dbg(bus->dev, "Invalid capability reg read\n"); + break; + } + switch ((cur_cap & AZX_CAP_HDR_ID_MASK) >> AZX_CAP_HDR_ID_OFF) { case AZX_ML_CAP_ID: dev_dbg(bus->dev, "Found ML capability\n"); diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c index e1af24f87566..c308a4f70550 100644 --- a/sound/pci/au88x0/au88x0_core.c +++ b/sound/pci/au88x0/au88x0_core.c @@ -2279,6 +2279,9 @@ vortex_adb_allocroute(vortex_t *vortex, int dma, int nr_ch, int dir, } else { int src[2], mix[2]; + if (nr_ch < 1) + return -EINVAL; + /* Get SRC and MIXER hardware resources. */ for (i = 0; i < nr_ch; i++) { if ((mix[i] = diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c index 937071760bc4..286f5e3686a3 100644 --- a/sound/pci/echoaudio/echoaudio.c +++ b/sound/pci/echoaudio/echoaudio.c @@ -1272,11 +1272,11 @@ static int snd_echo_mixer_info(struct snd_kcontrol *kcontrol, chip = snd_kcontrol_chip(kcontrol); uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 1; uinfo->value.integer.min = ECHOGAIN_MINOUT; uinfo->value.integer.max = ECHOGAIN_MAXOUT; uinfo->dimen.d[0] = num_busses_out(chip); uinfo->dimen.d[1] = num_busses_in(chip); - uinfo->count = uinfo->dimen.d[0] * uinfo->dimen.d[1]; return 0; } @@ -1344,11 +1344,11 @@ static int snd_echo_vmixer_info(struct snd_kcontrol *kcontrol, chip = snd_kcontrol_chip(kcontrol); uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 1; uinfo->value.integer.min = ECHOGAIN_MINOUT; uinfo->value.integer.max = ECHOGAIN_MAXOUT; uinfo->dimen.d[0] = num_busses_out(chip); uinfo->dimen.d[1] = num_pipes_out(chip); - uinfo->count = uinfo->dimen.d[0] * uinfo->dimen.d[1]; return 0; } @@ -1728,6 +1728,7 @@ static int snd_echo_vumeters_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 96; uinfo->value.integer.min = ECHOGAIN_MINOUT; uinfo->value.integer.max = 0; #ifdef ECHOCARD_HAS_VMIXER @@ -1737,7 +1738,6 @@ static int snd_echo_vumeters_info(struct snd_kcontrol *kcontrol, #endif uinfo->dimen.d[1] = 16; /* 16 channels */ uinfo->dimen.d[2] = 2; /* 0=level, 1=peak */ - uinfo->count = uinfo->dimen.d[0] * uinfo->dimen.d[1] * uinfo->dimen.d[2]; return 0; } diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 9913be8532ab..e46c561a8c90 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -1755,7 +1755,7 @@ static int get_kctl_0dB_offset(struct hda_codec *codec, return -1; if (*step_to_check && *step_to_check != step) { codec_err(codec, "Mismatching dB step for vmaster slave (%d!=%d)\n", -- *step_to_check, step); + *step_to_check, step); return -1; } *step_to_check = step; diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 775c67818bf1..bd650222e711 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -3685,6 +3685,7 @@ HDA_CODEC_ENTRY(0x80862808, "Broadwell HDMI", patch_i915_hsw_hdmi), HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI", patch_i915_hsw_hdmi), HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI", patch_i915_hsw_hdmi), HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI", patch_i915_hsw_hdmi), +HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_hsw_hdmi), HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi), HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI", patch_i915_byt_hdmi), HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI", patch_i915_byt_hdmi), diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c index f24b7cfd3a89..e024800213f5 100644 --- a/sound/soc/codecs/rt5514.c +++ b/sound/soc/codecs/rt5514.c @@ -395,14 +395,14 @@ static const char * const rt5514_dmic_src[] = { "DMIC1", "DMIC2" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5514_stereo1_dmic_enum, RT5514_DIG_SOURCE_CTRL, RT5514_AD0_DMIC_INPUT_SEL_SFT, rt5514_dmic_src); static const struct snd_kcontrol_new rt5514_sto1_dmic_mux = SOC_DAPM_ENUM("Stereo1 DMIC Source", rt5514_stereo1_dmic_enum); -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5514_stereo2_dmic_enum, RT5514_DIG_SOURCE_CTRL, RT5514_AD1_DMIC_INPUT_SEL_SFT, rt5514_dmic_src); diff --git a/sound/soc/codecs/rt5659.c b/sound/soc/codecs/rt5659.c index db54550aed60..635818fcda00 100644 --- a/sound/soc/codecs/rt5659.c +++ b/sound/soc/codecs/rt5659.c @@ -1150,28 +1150,28 @@ static const char * const rt5659_data_select[] = { "L/R", "R/L", "L/L", "R/R" }; -static const SOC_ENUM_SINGLE_DECL(rt5659_if1_01_adc_enum, +static SOC_ENUM_SINGLE_DECL(rt5659_if1_01_adc_enum, RT5659_TDM_CTRL_2, RT5659_DS_ADC_SLOT01_SFT, rt5659_data_select); -static const SOC_ENUM_SINGLE_DECL(rt5659_if1_23_adc_enum, +static SOC_ENUM_SINGLE_DECL(rt5659_if1_23_adc_enum, RT5659_TDM_CTRL_2, RT5659_DS_ADC_SLOT23_SFT, rt5659_data_select); -static const SOC_ENUM_SINGLE_DECL(rt5659_if1_45_adc_enum, +static SOC_ENUM_SINGLE_DECL(rt5659_if1_45_adc_enum, RT5659_TDM_CTRL_2, RT5659_DS_ADC_SLOT45_SFT, rt5659_data_select); -static const SOC_ENUM_SINGLE_DECL(rt5659_if1_67_adc_enum, +static SOC_ENUM_SINGLE_DECL(rt5659_if1_67_adc_enum, RT5659_TDM_CTRL_2, RT5659_DS_ADC_SLOT67_SFT, rt5659_data_select); -static const SOC_ENUM_SINGLE_DECL(rt5659_if2_dac_enum, +static SOC_ENUM_SINGLE_DECL(rt5659_if2_dac_enum, RT5659_DIG_INF23_DATA, RT5659_IF2_DAC_SEL_SFT, rt5659_data_select); -static const SOC_ENUM_SINGLE_DECL(rt5659_if2_adc_enum, +static SOC_ENUM_SINGLE_DECL(rt5659_if2_adc_enum, RT5659_DIG_INF23_DATA, RT5659_IF2_ADC_SEL_SFT, rt5659_data_select); -static const SOC_ENUM_SINGLE_DECL(rt5659_if3_dac_enum, +static SOC_ENUM_SINGLE_DECL(rt5659_if3_dac_enum, RT5659_DIG_INF23_DATA, RT5659_IF3_DAC_SEL_SFT, rt5659_data_select); -static const SOC_ENUM_SINGLE_DECL(rt5659_if3_adc_enum, +static SOC_ENUM_SINGLE_DECL(rt5659_if3_adc_enum, RT5659_DIG_INF23_DATA, RT5659_IF3_ADC_SEL_SFT, rt5659_data_select); static const struct snd_kcontrol_new rt5659_if1_01_adc_swap_mux = @@ -1207,31 +1207,31 @@ static unsigned int rt5659_asrc_clk_map_values[] = { 0, 1, 2, 3, 5, 6, }; -static const SOC_VALUE_ENUM_SINGLE_DECL( +static SOC_VALUE_ENUM_SINGLE_DECL( rt5659_da_sto_asrc_enum, RT5659_ASRC_2, RT5659_DA_STO_T_SFT, 0x7, rt5659_asrc_clk_src, rt5659_asrc_clk_map_values); -static const SOC_VALUE_ENUM_SINGLE_DECL( +static SOC_VALUE_ENUM_SINGLE_DECL( rt5659_da_monol_asrc_enum, RT5659_ASRC_2, RT5659_DA_MONO_L_T_SFT, 0x7, rt5659_asrc_clk_src, rt5659_asrc_clk_map_values); -static const SOC_VALUE_ENUM_SINGLE_DECL( +static SOC_VALUE_ENUM_SINGLE_DECL( rt5659_da_monor_asrc_enum, RT5659_ASRC_2, RT5659_DA_MONO_R_T_SFT, 0x7, rt5659_asrc_clk_src, rt5659_asrc_clk_map_values); -static const SOC_VALUE_ENUM_SINGLE_DECL( +static SOC_VALUE_ENUM_SINGLE_DECL( rt5659_ad_sto1_asrc_enum, RT5659_ASRC_2, RT5659_AD_STO1_T_SFT, 0x7, rt5659_asrc_clk_src, rt5659_asrc_clk_map_values); -static const SOC_VALUE_ENUM_SINGLE_DECL( +static SOC_VALUE_ENUM_SINGLE_DECL( rt5659_ad_sto2_asrc_enum, RT5659_ASRC_3, RT5659_AD_STO2_T_SFT, 0x7, rt5659_asrc_clk_src, rt5659_asrc_clk_map_values); -static const SOC_VALUE_ENUM_SINGLE_DECL( +static SOC_VALUE_ENUM_SINGLE_DECL( rt5659_ad_monol_asrc_enum, RT5659_ASRC_3, RT5659_AD_MONO_L_T_SFT, 0x7, rt5659_asrc_clk_src, rt5659_asrc_clk_map_values); -static const SOC_VALUE_ENUM_SINGLE_DECL( +static SOC_VALUE_ENUM_SINGLE_DECL( rt5659_ad_monor_asrc_enum, RT5659_ASRC_3, RT5659_AD_MONO_R_T_SFT, 0x7, rt5659_asrc_clk_src, rt5659_asrc_clk_map_values); @@ -1930,14 +1930,14 @@ static const char * const rt5659_dac2_src[] = { "IF1 DAC2", "IF2 DAC", "IF3 DAC", "Mono ADC MIX" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_dac_l2_enum, RT5659_DAC_CTRL, RT5659_DAC_L2_SEL_SFT, rt5659_dac2_src); static const struct snd_kcontrol_new rt5659_dac_l2_mux = SOC_DAPM_ENUM("DAC L2 Source", rt5659_dac_l2_enum); -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_dac_r2_enum, RT5659_DAC_CTRL, RT5659_DAC_R2_SEL_SFT, rt5659_dac2_src); @@ -1951,7 +1951,7 @@ static const char * const rt5659_sto1_adc1_src[] = { "DAC MIX", "ADC" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_sto1_adc1_enum, RT5659_STO1_ADC_MIXER, RT5659_STO1_ADC1_SRC_SFT, rt5659_sto1_adc1_src); @@ -1964,7 +1964,7 @@ static const char * const rt5659_sto1_adc_src[] = { "ADC1", "ADC2" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_sto1_adc_enum, RT5659_STO1_ADC_MIXER, RT5659_STO1_ADC_SRC_SFT, rt5659_sto1_adc_src); @@ -1977,7 +1977,7 @@ static const char * const rt5659_sto1_adc2_src[] = { "DAC MIX", "DMIC" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_sto1_adc2_enum, RT5659_STO1_ADC_MIXER, RT5659_STO1_ADC2_SRC_SFT, rt5659_sto1_adc2_src); @@ -1990,7 +1990,7 @@ static const char * const rt5659_sto1_dmic_src[] = { "DMIC1", "DMIC2" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_sto1_dmic_enum, RT5659_STO1_ADC_MIXER, RT5659_STO1_DMIC_SRC_SFT, rt5659_sto1_dmic_src); @@ -2004,7 +2004,7 @@ static const char * const rt5659_mono_adc_l2_src[] = { "Mono DAC MIXL", "DMIC" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_mono_adc_l2_enum, RT5659_MONO_ADC_MIXER, RT5659_MONO_ADC_L2_SRC_SFT, rt5659_mono_adc_l2_src); @@ -2018,7 +2018,7 @@ static const char * const rt5659_mono_adc_l1_src[] = { "Mono DAC MIXL", "ADC" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_mono_adc_l1_enum, RT5659_MONO_ADC_MIXER, RT5659_MONO_ADC_L1_SRC_SFT, rt5659_mono_adc_l1_src); @@ -2031,14 +2031,14 @@ static const char * const rt5659_mono_adc_src[] = { "ADC1 L", "ADC1 R", "ADC2 L", "ADC2 R" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_mono_adc_l_enum, RT5659_MONO_ADC_MIXER, RT5659_MONO_ADC_L_SRC_SFT, rt5659_mono_adc_src); static const struct snd_kcontrol_new rt5659_mono_adc_l_mux = SOC_DAPM_ENUM("Mono ADC L Source", rt5659_mono_adc_l_enum); -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_mono_adcr_enum, RT5659_MONO_ADC_MIXER, RT5659_MONO_ADC_R_SRC_SFT, rt5659_mono_adc_src); @@ -2051,7 +2051,7 @@ static const char * const rt5659_mono_dmic_l_src[] = { "DMIC1 L", "DMIC2 L" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_mono_dmic_l_enum, RT5659_MONO_ADC_MIXER, RT5659_MONO_DMIC_L_SRC_SFT, rt5659_mono_dmic_l_src); @@ -2064,7 +2064,7 @@ static const char * const rt5659_mono_adc_r2_src[] = { "Mono DAC MIXR", "DMIC" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_mono_adc_r2_enum, RT5659_MONO_ADC_MIXER, RT5659_MONO_ADC_R2_SRC_SFT, rt5659_mono_adc_r2_src); @@ -2077,7 +2077,7 @@ static const char * const rt5659_mono_adc_r1_src[] = { "Mono DAC MIXR", "ADC" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_mono_adc_r1_enum, RT5659_MONO_ADC_MIXER, RT5659_MONO_ADC_R1_SRC_SFT, rt5659_mono_adc_r1_src); @@ -2090,7 +2090,7 @@ static const char * const rt5659_mono_dmic_r_src[] = { "DMIC1 R", "DMIC2 R" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_mono_dmic_r_enum, RT5659_MONO_ADC_MIXER, RT5659_MONO_DMIC_R_SRC_SFT, rt5659_mono_dmic_r_src); @@ -2104,14 +2104,14 @@ static const char * const rt5659_dac1_src[] = { "IF1 DAC1", "IF2 DAC", "IF3 DAC" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_dac_r1_enum, RT5659_AD_DA_MIXER, RT5659_DAC1_R_SEL_SFT, rt5659_dac1_src); static const struct snd_kcontrol_new rt5659_dac_r1_mux = SOC_DAPM_ENUM("DAC R1 Source", rt5659_dac_r1_enum); -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_dac_l1_enum, RT5659_AD_DA_MIXER, RT5659_DAC1_L_SEL_SFT, rt5659_dac1_src); @@ -2124,14 +2124,14 @@ static const char * const rt5659_dig_dac_mix_src[] = { "Stereo DAC Mixer", "Mono DAC Mixer" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_dig_dac_mixl_enum, RT5659_DIG_MIXER, RT5659_DAC_MIX_L_SFT, rt5659_dig_dac_mix_src); static const struct snd_kcontrol_new rt5659_dig_dac_mixl_mux = SOC_DAPM_ENUM("DAC Digital Mixer L Source", rt5659_dig_dac_mixl_enum); -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_dig_dac_mixr_enum, RT5659_DIG_MIXER, RT5659_DAC_MIX_R_SFT, rt5659_dig_dac_mix_src); @@ -2144,14 +2144,14 @@ static const char * const rt5659_alg_dac1_src[] = { "DAC", "Stereo DAC Mixer" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_alg_dac_l1_enum, RT5659_A_DAC_MUX, RT5659_A_DACL1_SFT, rt5659_alg_dac1_src); static const struct snd_kcontrol_new rt5659_alg_dac_l1_mux = SOC_DAPM_ENUM("Analog DACL1 Source", rt5659_alg_dac_l1_enum); -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_alg_dac_r1_enum, RT5659_A_DAC_MUX, RT5659_A_DACR1_SFT, rt5659_alg_dac1_src); @@ -2164,14 +2164,14 @@ static const char * const rt5659_alg_dac2_src[] = { "Stereo DAC Mixer", "Mono DAC Mixer" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_alg_dac_l2_enum, RT5659_A_DAC_MUX, RT5659_A_DACL2_SFT, rt5659_alg_dac2_src); static const struct snd_kcontrol_new rt5659_alg_dac_l2_mux = SOC_DAPM_ENUM("Analog DAC L2 Source", rt5659_alg_dac_l2_enum); -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_alg_dac_r2_enum, RT5659_A_DAC_MUX, RT5659_A_DACR2_SFT, rt5659_alg_dac2_src); @@ -2184,7 +2184,7 @@ static const char * const rt5659_if2_adc_in_src[] = { "IF_ADC1", "IF_ADC2", "DAC_REF", "IF_ADC3" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_if2_adc_in_enum, RT5659_DIG_INF23_DATA, RT5659_IF2_ADC_IN_SFT, rt5659_if2_adc_in_src); @@ -2197,7 +2197,7 @@ static const char * const rt5659_if3_adc_in_src[] = { "IF_ADC1", "IF_ADC2", "DAC_REF", "Stereo2_ADC_L/R" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_if3_adc_in_enum, RT5659_DIG_INF23_DATA, RT5659_IF3_ADC_IN_SFT, rt5659_if3_adc_in_src); @@ -2210,14 +2210,14 @@ static const char * const rt5659_pdm_src[] = { "Mono DAC", "Stereo DAC" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_pdm_l_enum, RT5659_PDM_OUT_CTRL, RT5659_PDM1_L_SFT, rt5659_pdm_src); static const struct snd_kcontrol_new rt5659_pdm_l_mux = SOC_DAPM_ENUM("PDM L Source", rt5659_pdm_l_enum); -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_pdm_r_enum, RT5659_PDM_OUT_CTRL, RT5659_PDM1_R_SFT, rt5659_pdm_src); @@ -2230,7 +2230,7 @@ static const char * const rt5659_spdif_src[] = { "IF1_DAC1", "IF1_DAC2", "IF2_DAC", "IF3_DAC" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_spdif_enum, RT5659_SPDIF_CTRL, RT5659_SPDIF_SEL_SFT, rt5659_spdif_src); @@ -2250,7 +2250,7 @@ static const char * const rt5659_rx_adc_data_src[] = { "NUL:AD2:DAC:AD1", "NUL:DAC:DAC:AD2", "NUL:DAC:AD2:DAC" }; -static const SOC_ENUM_SINGLE_DECL( +static SOC_ENUM_SINGLE_DECL( rt5659_rx_adc_data_enum, RT5659_TDM_CTRL_2, RT5659_ADCDAT_SRC_SFT, rt5659_rx_adc_data_src); diff --git a/sound/soc/codecs/rt5660.c b/sound/soc/codecs/rt5660.c index 9f0933ced804..e396b7680fa1 100644 --- a/sound/soc/codecs/rt5660.c +++ b/sound/soc/codecs/rt5660.c @@ -526,10 +526,10 @@ static const char * const rt5660_data_select[] = { "L/R", "R/L", "L/L", "R/R" }; -static const SOC_ENUM_SINGLE_DECL(rt5660_if1_dac_enum, +static SOC_ENUM_SINGLE_DECL(rt5660_if1_dac_enum, RT5660_DIG_INF1_DATA, RT5660_IF1_DAC_IN_SFT, rt5660_data_select); -static const SOC_ENUM_SINGLE_DECL(rt5660_if1_adc_enum, +static SOC_ENUM_SINGLE_DECL(rt5660_if1_adc_enum, RT5660_DIG_INF1_DATA, RT5660_IF1_ADC_IN_SFT, rt5660_data_select); static const struct snd_kcontrol_new rt5660_if1_dac_swap_mux = diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index b943dde8dbe5..3bdd81930486 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -789,7 +789,10 @@ static int wm_coeff_put(struct snd_kcontrol *kctl, mutex_lock(&ctl->dsp->pwr_lock); - memcpy(ctl->cache, p, ctl->len); + if (ctl->flags & WMFW_CTL_FLAG_VOLATILE) + ret = -EPERM; + else + memcpy(ctl->cache, p, ctl->len); ctl->set = 1; if (ctl->enabled && ctl->dsp->running) @@ -816,6 +819,8 @@ static int wm_coeff_tlv_put(struct snd_kcontrol *kctl, ctl->set = 1; if (ctl->enabled && ctl->dsp->running) ret = wm_coeff_write_control(ctl, ctl->cache, size); + else if (ctl->flags & WMFW_CTL_FLAG_VOLATILE) + ret = -EPERM; } mutex_unlock(&ctl->dsp->pwr_lock); diff --git a/sound/soc/mediatek/Kconfig b/sound/soc/mediatek/Kconfig index 05cf809cf9e1..d7013bde6f45 100644 --- a/sound/soc/mediatek/Kconfig +++ b/sound/soc/mediatek/Kconfig @@ -13,7 +13,7 @@ config SND_SOC_MT2701 config SND_SOC_MT2701_CS42448 tristate "ASoc Audio driver for MT2701 with CS42448 codec" - depends on SND_SOC_MT2701 + depends on SND_SOC_MT2701 && I2C select SND_SOC_CS42XX8_I2C select SND_SOC_BT_SCO help diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 3bbe32ee4630..6780eba55ec2 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -358,6 +358,10 @@ static int dapm_kcontrol_data_alloc(struct snd_soc_dapm_widget *widget, snd_soc_dapm_new_control_unlocked(widget->dapm, &template); kfree(name); + if (IS_ERR(data->widget)) { + ret = PTR_ERR(data->widget); + goto err_data; + } if (!data->widget) { ret = -ENOMEM; goto err_data; @@ -392,6 +396,10 @@ static int dapm_kcontrol_data_alloc(struct snd_soc_dapm_widget *widget, data->widget = snd_soc_dapm_new_control_unlocked( widget->dapm, &template); kfree(name); + if (IS_ERR(data->widget)) { + ret = PTR_ERR(data->widget); + goto err_data; + } if (!data->widget) { ret = -ENOMEM; goto err_data; @@ -3311,11 +3319,22 @@ snd_soc_dapm_new_control(struct snd_soc_dapm_context *dapm, mutex_lock_nested(&dapm->card->dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME); w = snd_soc_dapm_new_control_unlocked(dapm, widget); + /* Do not nag about probe deferrals */ + if (IS_ERR(w)) { + int ret = PTR_ERR(w); + + if (ret != -EPROBE_DEFER) + dev_err(dapm->dev, + "ASoC: Failed to create DAPM control %s (%d)\n", + widget->name, ret); + goto out_unlock; + } if (!w) dev_err(dapm->dev, "ASoC: Failed to create DAPM control %s\n", widget->name); +out_unlock: mutex_unlock(&dapm->card->dapm_mutex); return w; } @@ -3338,6 +3357,8 @@ snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm, w->regulator = devm_regulator_get(dapm->dev, w->name); if (IS_ERR(w->regulator)) { ret = PTR_ERR(w->regulator); + if (ret == -EPROBE_DEFER) + return ERR_PTR(ret); dev_err(dapm->dev, "ASoC: Failed to request %s: %d\n", w->name, ret); return NULL; @@ -3356,6 +3377,8 @@ snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm, w->clk = devm_clk_get(dapm->dev, w->name); if (IS_ERR(w->clk)) { ret = PTR_ERR(w->clk); + if (ret == -EPROBE_DEFER) + return ERR_PTR(ret); dev_err(dapm->dev, "ASoC: Failed to request %s: %d\n", w->name, ret); return NULL; @@ -3474,6 +3497,16 @@ int snd_soc_dapm_new_controls(struct snd_soc_dapm_context *dapm, mutex_lock_nested(&dapm->card->dapm_mutex, SND_SOC_DAPM_CLASS_INIT); for (i = 0; i < num; i++) { w = snd_soc_dapm_new_control_unlocked(dapm, widget); + if (IS_ERR(w)) { + ret = PTR_ERR(w); + /* Do not nag about probe deferrals */ + if (ret == -EPROBE_DEFER) + break; + dev_err(dapm->dev, + "ASoC: Failed to create DAPM control %s (%d)\n", + widget->name, ret); + break; + } if (!w) { dev_err(dapm->dev, "ASoC: Failed to create DAPM control %s\n", @@ -3750,6 +3783,15 @@ int snd_soc_dapm_new_pcm(struct snd_soc_card *card, dev_dbg(card->dev, "ASoC: adding %s widget\n", link_name); w = snd_soc_dapm_new_control_unlocked(&card->dapm, &template); + if (IS_ERR(w)) { + ret = PTR_ERR(w); + /* Do not nag about probe deferrals */ + if (ret != -EPROBE_DEFER) + dev_err(card->dev, + "ASoC: Failed to create %s widget (%d)\n", + link_name, ret); + goto outfree_kcontrol_news; + } if (!w) { dev_err(card->dev, "ASoC: Failed to create %s widget\n", link_name); @@ -3801,6 +3843,16 @@ int snd_soc_dapm_new_dai_widgets(struct snd_soc_dapm_context *dapm, template.name); w = snd_soc_dapm_new_control_unlocked(dapm, &template); + if (IS_ERR(w)) { + int ret = PTR_ERR(w); + + /* Do not nag about probe deferrals */ + if (ret != -EPROBE_DEFER) + dev_err(dapm->dev, + "ASoC: Failed to create %s widget (%d)\n", + dai->driver->playback.stream_name, ret); + return ret; + } if (!w) { dev_err(dapm->dev, "ASoC: Failed to create %s widget\n", dai->driver->playback.stream_name); @@ -3820,6 +3872,16 @@ int snd_soc_dapm_new_dai_widgets(struct snd_soc_dapm_context *dapm, template.name); w = snd_soc_dapm_new_control_unlocked(dapm, &template); + if (IS_ERR(w)) { + int ret = PTR_ERR(w); + + /* Do not nag about probe deferrals */ + if (ret != -EPROBE_DEFER) + dev_err(dapm->dev, + "ASoC: Failed to create %s widget (%d)\n", + dai->driver->playback.stream_name, ret); + return ret; + } if (!w) { dev_err(dapm->dev, "ASoC: Failed to create %s widget\n", dai->driver->capture.stream_name); diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 6b05047a4134..8a758c994506 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -1473,6 +1473,15 @@ widget: widget = snd_soc_dapm_new_control(dapm, &template); else widget = snd_soc_dapm_new_control_unlocked(dapm, &template); + if (IS_ERR(widget)) { + ret = PTR_ERR(widget); + /* Do not nag about probe deferrals */ + if (ret != -EPROBE_DEFER) + dev_err(tplg->dev, + "ASoC: failed to create widget %s controls (%d)\n", + w->name, ret); + goto hdr_err; + } if (widget == NULL) { dev_err(tplg->dev, "ASoC: failed to create widget %s controls\n", w->name); diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c index b871ba407e4e..4458190149d1 100644 --- a/sound/usb/caiaq/device.c +++ b/sound/usb/caiaq/device.c @@ -469,10 +469,12 @@ static int init_card(struct snd_usb_caiaqdev *cdev) err = snd_usb_caiaq_send_command(cdev, EP1_CMD_GET_DEVICE_INFO, NULL, 0); if (err) - return err; + goto err_kill_urb; - if (!wait_event_timeout(cdev->ep1_wait_queue, cdev->spec_received, HZ)) - return -ENODEV; + if (!wait_event_timeout(cdev->ep1_wait_queue, cdev->spec_received, HZ)) { + err = -ENODEV; + goto err_kill_urb; + } usb_string(usb_dev, usb_dev->descriptor.iManufacturer, cdev->vendor_name, CAIAQ_USB_STR_LEN); @@ -507,6 +509,10 @@ static int init_card(struct snd_usb_caiaqdev *cdev) setup_card(cdev); return 0; + + err_kill_urb: + usb_kill_urb(&cdev->ep1_in_urb); + return err; } static int snd_probe(struct usb_interface *intf, diff --git a/sound/usb/card.c b/sound/usb/card.c index f36cb068dad3..8906199a83e6 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -221,6 +221,7 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif) struct usb_interface_descriptor *altsd; void *control_header; int i, protocol; + int rest_bytes; /* find audiocontrol interface */ host_iface = &usb_ifnum_to_if(dev, ctrlif)->altsetting[0]; @@ -235,6 +236,15 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif) return -EINVAL; } + rest_bytes = (void *)(host_iface->extra + host_iface->extralen) - + control_header; + + /* just to be sure -- this shouldn't hit at all */ + if (rest_bytes <= 0) { + dev_err(&dev->dev, "invalid control header\n"); + return -EINVAL; + } + switch (protocol) { default: dev_warn(&dev->dev, @@ -245,11 +255,21 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif) case UAC_VERSION_1: { struct uac1_ac_header_descriptor *h1 = control_header; + if (rest_bytes < sizeof(*h1)) { + dev_err(&dev->dev, "too short v1 buffer descriptor\n"); + return -EINVAL; + } + if (!h1->bInCollection) { dev_info(&dev->dev, "skipping empty audio interface (v1)\n"); return -EINVAL; } + if (rest_bytes < h1->bLength) { + dev_err(&dev->dev, "invalid buffer length (v1)\n"); + return -EINVAL; + } + if (h1->bLength < sizeof(*h1) + h1->bInCollection) { dev_err(&dev->dev, "invalid UAC_HEADER (v1)\n"); return -EINVAL; diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index ab3c280a23d1..58d624938a9f 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -775,9 +775,10 @@ int line6_probe(struct usb_interface *interface, return 0; error: - if (line6->disconnect) - line6->disconnect(line6); - snd_card_free(card); + /* we can call disconnect callback here because no close-sync is + * needed yet at this point + */ + line6_disconnect(interface); return ret; } EXPORT_SYMBOL_GPL(line6_probe); diff --git a/sound/usb/line6/podhd.c b/sound/usb/line6/podhd.c index 49cd4a65e390..5ab9e0c89211 100644 --- a/sound/usb/line6/podhd.c +++ b/sound/usb/line6/podhd.c @@ -307,6 +307,9 @@ static int podhd_init(struct usb_line6 *line6, line6->disconnect = podhd_disconnect; + init_timer(&pod->startup_timer); + INIT_WORK(&pod->startup_work, podhd_startup_workqueue); + if (pod->line6.properties->capabilities & LINE6_CAP_CONTROL) { /* create sysfs entries: */ err = snd_card_add_dev_attr(line6->card, &podhd_dev_attr_group); @@ -330,8 +333,6 @@ static int podhd_init(struct usb_line6 *line6, } /* init device and delay registering */ - init_timer(&pod->startup_timer); - INIT_WORK(&pod->startup_work, podhd_startup_workqueue); podhd_startup(pod); return 0; } diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index d09c28c1deaf..d82e3c81c258 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -2228,6 +2228,9 @@ static int parse_audio_unit(struct mixer_build *state, int unitid) static void snd_usb_mixer_free(struct usb_mixer_interface *mixer) { + /* kill pending URBs */ + snd_usb_mixer_disconnect(mixer); + kfree(mixer->id_elems); if (mixer->urb) { kfree(mixer->urb->transfer_buffer); @@ -2578,8 +2581,13 @@ _error: void snd_usb_mixer_disconnect(struct usb_mixer_interface *mixer) { - usb_kill_urb(mixer->urb); - usb_kill_urb(mixer->rc_urb); + if (mixer->disconnected) + return; + if (mixer->urb) + usb_kill_urb(mixer->urb); + if (mixer->rc_urb) + usb_kill_urb(mixer->rc_urb); + mixer->disconnected = true; } #ifdef CONFIG_PM diff --git a/sound/usb/mixer.h b/sound/usb/mixer.h index 2b4b067646ab..545d99b09706 100644 --- a/sound/usb/mixer.h +++ b/sound/usb/mixer.h @@ -22,6 +22,8 @@ struct usb_mixer_interface { struct urb *rc_urb; struct usb_ctrlrequest *rc_setup_packet; u8 rc_buffer[6]; + + bool disconnected; }; #define MAX_CHANNELS 16 /* max logical channels */ diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 286efc3a6116..7613b9e07b5c 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1352,6 +1352,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case USB_ID(0x20b1, 0x2008): /* Matrix Audio X-Sabre */ case USB_ID(0x20b1, 0x300a): /* Matrix Audio Mini-i Pro */ case USB_ID(0x22d9, 0x0416): /* OPPO HA-1 */ + case USB_ID(0x2772, 0x0230): /* Pro-Ject Pre Box S2 Digital */ if (fp->altsetting == 2) return SNDRV_PCM_FMTBIT_DSD_U32_BE; break; diff --git a/sound/usb/usx2y/usb_stream.c b/sound/usb/usx2y/usb_stream.c index bf618e1500ac..e7b934f4d837 100644 --- a/sound/usb/usx2y/usb_stream.c +++ b/sound/usb/usx2y/usb_stream.c @@ -191,7 +191,8 @@ struct usb_stream *usb_stream_new(struct usb_stream_kernel *sk, } pg = get_order(read_size); - sk->s = (void *) __get_free_pages(GFP_KERNEL|__GFP_COMP|__GFP_ZERO, pg); + sk->s = (void *) __get_free_pages(GFP_KERNEL|__GFP_COMP|__GFP_ZERO| + __GFP_NOWARN, pg); if (!sk->s) { snd_printk(KERN_WARNING "couldn't __get_free_pages()\n"); goto out; @@ -211,7 +212,8 @@ struct usb_stream *usb_stream_new(struct usb_stream_kernel *sk, pg = get_order(write_size); sk->write_page = - (void *)__get_free_pages(GFP_KERNEL|__GFP_COMP|__GFP_ZERO, pg); + (void *)__get_free_pages(GFP_KERNEL|__GFP_COMP|__GFP_ZERO| + __GFP_NOWARN, pg); if (!sk->write_page) { snd_printk(KERN_WARNING "couldn't __get_free_pages()\n"); usb_stream_free(sk); diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt index 55a60d331f47..05536d8cf77d 100644 --- a/tools/objtool/Documentation/stack-validation.txt +++ b/tools/objtool/Documentation/stack-validation.txt @@ -321,11 +321,10 @@ Errors in .c files 2. If you're getting any other objtool error in a compiled .c file, it may be because the file uses an asm() statement which has a "call" instruction. An asm() statement with a call instruction must declare - the use of the stack pointer in its output operand. For example, on - x86_64: + the use of the stack pointer in its output operand. On x86_64, this + means adding the ASM_CALL_CONSTRAINT as an output constraint: - register void *__sp asm("rsp"); - asm volatile("call func" : "+r" (__sp)); + asm volatile("call func" : ASM_CALL_CONSTRAINT); Otherwise the stack frame may not get created before the call. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 3e199b508a96..9664b1ff4285 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2003,8 +2003,10 @@ int snapshot_gfx_mhz(void) if (fp == NULL) fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r"); - else + else { rewind(fp); + fflush(fp); + } retval = fscanf(fp, "%d", &gfx_cur_mhz); if (retval != 1) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 03f1fa495d74..cbb0564c0ec4 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -6,10 +6,18 @@ */ #include -#include -#define __have_siginfo_t 1 -#define __have_sigval_t 1 -#define __have_sigevent_t 1 + +/* + * glibc 2.26 and later have SIGSYS in siginfo_t. Before that, + * we need to use the kernel's siginfo.h file and trick glibc + * into accepting it. + */ +#if !__GLIBC_PREREQ(2, 26) +# include +# define __have_siginfo_t 1 +# define __have_sigval_t 1 +# define __have_sigevent_t 1 +#endif #include #include @@ -676,7 +684,7 @@ TEST_F_SIGNAL(TRAP, ign, SIGSYS) syscall(__NR_getpid); } -static struct siginfo TRAP_info; +static siginfo_t TRAP_info; static volatile int TRAP_nr; static void TRAP_action(int nr, siginfo_t *info, void *void_context) {