From a8af2054e98548e8515f5d7d5409bd50dcd77e82 Mon Sep 17 00:00:00 2001 From: Purna Chandra Mandal Date: Thu, 2 Jun 2016 14:51:42 +0530 Subject: [PATCH 001/154] MIPS: pic32mzda: Fix linker error for pic32_get_pbclk() commit a726f1d2dd4fee179aa4513176d688ad309de6cc upstream. Early clock API pic32_get_pbclk() is defined in early_clk.c and used by time.c and early_console.c. When CONFIG_EARLY_PRINTK isn't set, early_clk.c isn't compiled and time.c fails to link. Fix it by compiling early_clk.c always. Also sort files in alphabetical order. Fixes: 6e4ad1b41360 ("MIPS: pic32mzda: fix getting timer clock rate.") Reported-by: Harvey Hunt Signed-off-by: Purna Chandra Mandal Reviewed-by: Harvey Hunt Cc: Ralf Baechle Cc: Joshua Henderson Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/13383/ Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/pic32/pic32mzda/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/mips/pic32/pic32mzda/Makefile b/arch/mips/pic32/pic32mzda/Makefile index 4a4c2728c027..c28649615c6c 100644 --- a/arch/mips/pic32/pic32mzda/Makefile +++ b/arch/mips/pic32/pic32mzda/Makefile @@ -2,8 +2,7 @@ # Joshua Henderson, # Copyright (C) 2015 Microchip Technology, Inc. All rights reserved. # -obj-y := init.o time.o config.o +obj-y := config.o early_clk.o init.o time.o obj-$(CONFIG_EARLY_PRINTK) += early_console.o \ - early_pin.o \ - early_clk.o + early_pin.o From ef674c5eb8da4ecb028a2a670b0bb1eec9eed004 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 26 Jan 2017 02:16:47 +0100 Subject: [PATCH 002/154] MIPS: Fix special case in 64 bit IP checksumming. commit 66fd848cadaa6be974a8c780fbeb328f0af4d3bd upstream. For certain arguments such as saddr = 0xc0a8fd60, daddr = 0xc0a8fda1, len = 80, proto = 17, sum = 0x7eae049d there will be a carry when folding the intermediate 64 bit checksum to 32 bit but the code doesn't add the carry back to the one's complement sum, thus an incorrect result will be generated. Reported-by: Mark Zhang Signed-off-by: Ralf Baechle Reviewed-by: James Hogan Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/checksum.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h index bce1ce53149a..0e231970653a 100644 --- a/arch/mips/include/asm/checksum.h +++ b/arch/mips/include/asm/checksum.h @@ -186,7 +186,9 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, " daddu %0, %4 \n" " dsll32 $1, %0, 0 \n" " daddu %0, $1 \n" + " sltu $1, %0, $1 \n" " dsra32 %0, %0, 0 \n" + " addu %0, $1 \n" #endif " .set pop" : "=r" (sum) From ef9e73be031aa4998617a6e11581904687132e5e Mon Sep 17 00:00:00 2001 From: Mirko Parthey Date: Wed, 15 Feb 2017 23:31:30 +0100 Subject: [PATCH 003/154] MIPS: BCM47XX: Fix button inversion for Asus WL-500W MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bdfdaf1a016ef09cb941f2edad485a713510b8d5 upstream. The Asus WL-500W buttons are active high, but the software treats them as active low. Fix the inverted logic. Fixes: 3be972556fa1 ("MIPS: BCM47XX: Import buttons database from OpenWrt") Signed-off-by: Mirko Parthey Acked-by: Rafał Miłecki Cc: Hauke Mehrtens Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/15295/ Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/bcm47xx/buttons.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/mips/bcm47xx/buttons.c b/arch/mips/bcm47xx/buttons.c index 52caa75bfe4e..e2f50d690624 100644 --- a/arch/mips/bcm47xx/buttons.c +++ b/arch/mips/bcm47xx/buttons.c @@ -17,6 +17,12 @@ .active_low = 1, \ } +#define BCM47XX_GPIO_KEY_H(_gpio, _code) \ + { \ + .code = _code, \ + .gpio = _gpio, \ + } + /* Asus */ static const struct gpio_keys_button @@ -79,8 +85,8 @@ bcm47xx_buttons_asus_wl500gpv2[] __initconst = { static const struct gpio_keys_button bcm47xx_buttons_asus_wl500w[] __initconst = { - BCM47XX_GPIO_KEY(6, KEY_RESTART), - BCM47XX_GPIO_KEY(7, KEY_WPS_BUTTON), + BCM47XX_GPIO_KEY_H(6, KEY_RESTART), + BCM47XX_GPIO_KEY_H(7, KEY_WPS_BUTTON), }; static const struct gpio_keys_button From 6f35f1fc14b6188483f993ca989de4b0c85a5b1a Mon Sep 17 00:00:00 2001 From: James Cowgill Date: Mon, 9 Jan 2017 16:52:28 +0000 Subject: [PATCH 004/154] MIPS: OCTEON: Fix copy_from_user fault handling for large buffers commit 884b426917e4b3c85f33b382c792a94305dfdd62 upstream. If copy_from_user is called with a large buffer (>= 128 bytes) and the userspace buffer refers partially to unreadable memory, then it is possible for Octeon's copy_from_user to report the wrong number of bytes have been copied. In the case where the buffer size is an exact multiple of 128 and the fault occurs in the last 64 bytes, copy_from_user will report that all the bytes were copied successfully but leave some garbage in the destination buffer. The bug is in the main __copy_user_common loop in octeon-memcpy.S where in the middle of the loop, src and dst are incremented by 128 bytes. The l_exc_copy fault handler is used after this but that assumes that "src < THREAD_BUADDR($28)". This is not the case if src has already been incremented. Fix by adding an extra fault handler which rewinds the src and dst pointers 128 bytes before falling though to l_exc_copy. Thanks to the pwritev test from the strace test suite for originally highlighting this bug! Fixes: 5b3b16880f40 ("MIPS: Add Cavium OCTEON processor support ...") Signed-off-by: James Cowgill Acked-by: David Daney Reviewed-by: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14978/ Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/cavium-octeon/octeon-memcpy.S | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S index 64e08df51d65..8b7004132491 100644 --- a/arch/mips/cavium-octeon/octeon-memcpy.S +++ b/arch/mips/cavium-octeon/octeon-memcpy.S @@ -208,18 +208,18 @@ EXC( STORE t2, UNIT(6)(dst), s_exc_p10u) ADD src, src, 16*NBYTES EXC( STORE t3, UNIT(7)(dst), s_exc_p9u) ADD dst, dst, 16*NBYTES -EXC( LOAD t0, UNIT(-8)(src), l_exc_copy) -EXC( LOAD t1, UNIT(-7)(src), l_exc_copy) -EXC( LOAD t2, UNIT(-6)(src), l_exc_copy) -EXC( LOAD t3, UNIT(-5)(src), l_exc_copy) +EXC( LOAD t0, UNIT(-8)(src), l_exc_copy_rewind16) +EXC( LOAD t1, UNIT(-7)(src), l_exc_copy_rewind16) +EXC( LOAD t2, UNIT(-6)(src), l_exc_copy_rewind16) +EXC( LOAD t3, UNIT(-5)(src), l_exc_copy_rewind16) EXC( STORE t0, UNIT(-8)(dst), s_exc_p8u) EXC( STORE t1, UNIT(-7)(dst), s_exc_p7u) EXC( STORE t2, UNIT(-6)(dst), s_exc_p6u) EXC( STORE t3, UNIT(-5)(dst), s_exc_p5u) -EXC( LOAD t0, UNIT(-4)(src), l_exc_copy) -EXC( LOAD t1, UNIT(-3)(src), l_exc_copy) -EXC( LOAD t2, UNIT(-2)(src), l_exc_copy) -EXC( LOAD t3, UNIT(-1)(src), l_exc_copy) +EXC( LOAD t0, UNIT(-4)(src), l_exc_copy_rewind16) +EXC( LOAD t1, UNIT(-3)(src), l_exc_copy_rewind16) +EXC( LOAD t2, UNIT(-2)(src), l_exc_copy_rewind16) +EXC( LOAD t3, UNIT(-1)(src), l_exc_copy_rewind16) EXC( STORE t0, UNIT(-4)(dst), s_exc_p4u) EXC( STORE t1, UNIT(-3)(dst), s_exc_p3u) EXC( STORE t2, UNIT(-2)(dst), s_exc_p2u) @@ -383,6 +383,10 @@ done: nop END(memcpy) +l_exc_copy_rewind16: + /* Rewind src and dst by 16*NBYTES for l_exc_copy */ + SUB src, src, 16*NBYTES + SUB dst, dst, 16*NBYTES l_exc_copy: /* * Copy bytes from src until faulting load address (or until a From 3660e62cfce7a1db86fe330dadb92db59d0bc039 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 19 Jan 2017 14:20:09 +0100 Subject: [PATCH 005/154] MIPS: Lantiq: Keep ethernet enabled during boot commit 774f0c6419bb8f9d83901d33582c7fe3ba6a6cb3 upstream. Disabling ethernet during reboot (only to enable it again when the ethernet driver attaches) can put the chip into a faulty state where it corrupts the header of all incoming packets. This happens if packets arrive during the time window where the core is disabled, and it can be easily reproduced by rebooting while sending a flood ping to the broadcast address. Fixes: 95135bfa7ead ("MIPS: Lantiq: Deactivate most of the devices by default") Signed-off-by: Felix Fietkau Acked-by: John Crispin Cc: hauke.mehrtens@lantiq.com Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/15078/ Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/lantiq/xway/sysctrl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 236193b5210b..9a61671c00a7 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -545,7 +545,7 @@ void __init ltq_soc_init(void) clkdev_add_pmu("1a800000.pcie", "msi", 1, 1, PMU1_PCIE2_MSI); clkdev_add_pmu("1a800000.pcie", "pdi", 1, 1, PMU1_PCIE2_PDI); clkdev_add_pmu("1a800000.pcie", "ctl", 1, 1, PMU1_PCIE2_CTL); - clkdev_add_pmu("1e108000.eth", NULL, 1, 0, PMU_SWITCH | PMU_PPE_DP); + clkdev_add_pmu("1e108000.eth", NULL, 0, 0, PMU_SWITCH | PMU_PPE_DP); clkdev_add_pmu("1da00000.usif", "NULL", 1, 0, PMU_USIF); clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); } else if (of_machine_is_compatible("lantiq,ar10")) { @@ -553,7 +553,7 @@ void __init ltq_soc_init(void) ltq_ar10_fpi_hz(), ltq_ar10_pp32_hz()); clkdev_add_pmu("1e101000.usb", "ctl", 1, 0, PMU_USB0); clkdev_add_pmu("1e106000.usb", "ctl", 1, 0, PMU_USB1); - clkdev_add_pmu("1e108000.eth", NULL, 1, 0, PMU_SWITCH | + clkdev_add_pmu("1e108000.eth", NULL, 0, 0, PMU_SWITCH | PMU_PPE_DP | PMU_PPE_TC); clkdev_add_pmu("1da00000.usif", "NULL", 1, 0, PMU_USIF); clkdev_add_pmu("1f203000.rcu", "gphy", 1, 0, PMU_GPHY); @@ -575,11 +575,11 @@ void __init ltq_soc_init(void) clkdev_add_pmu(NULL, "ahb", 1, 0, PMU_AHBM | PMU_AHBS); clkdev_add_pmu("1da00000.usif", "NULL", 1, 0, PMU_USIF); - clkdev_add_pmu("1e108000.eth", NULL, 1, 0, + clkdev_add_pmu("1e108000.eth", NULL, 0, 0, PMU_SWITCH | PMU_PPE_DPLUS | PMU_PPE_DPLUM | PMU_PPE_EMA | PMU_PPE_TC | PMU_PPE_SLL01 | PMU_PPE_QSB | PMU_PPE_TOP); - clkdev_add_pmu("1f203000.rcu", "gphy", 1, 0, PMU_GPHY); + clkdev_add_pmu("1f203000.rcu", "gphy", 0, 0, PMU_GPHY); clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_SDIO); clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); clkdev_add_pmu("1e116000.mei", "dfe", 1, 0, PMU_DFE); From d0eae5bbd1c3361659c3c0e349c14123f0aaba81 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Nov 2016 15:07:02 +0000 Subject: [PATCH 006/154] MIPS: Clear ISA bit correctly in get_frame_info() commit ccaf7caf2c73c6db920772bf08bf1d47b2170634 upstream. get_frame_info() can be called in microMIPS kernels with the ISA bit already clear. For example this happens when unwind_stack_by_address() is called because we begin with a PC that has the ISA bit set & subtract the (odd) offset from the preceding symbol (which does not have the ISA bit set). Since get_frame_info() unconditionally subtracts 1 from the PC in microMIPS kernels it incorrectly misaligns the address it then attempts to access code at, leading to an address error exception. Fix this by using msk_isa16_mode() to clear the ISA bit, which allows get_frame_info() to function regardless of whether it is provided with a PC that has the ISA bit set or not. Signed-off-by: Paul Burton Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14528/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 9514e5f2209f..c0aa99b2e314 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -303,17 +303,14 @@ static inline int is_sp_move_ins(union mips_instruction *ip) static int get_frame_info(struct mips_frame_info *info) { -#ifdef CONFIG_CPU_MICROMIPS - union mips_instruction *ip = (void *) (((char *) info->func) - 1); -#else - union mips_instruction *ip = info->func; -#endif + union mips_instruction *ip; unsigned max_insns = info->func_size / sizeof(union mips_instruction); unsigned i; info->pc_offset = -1; info->frame_size = 0; + ip = (void *)msk_isa16_mode((ulong)info->func); if (!ip) goto err; From ce449cbdcff78a383741bc79d66f2779a556735b Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Nov 2016 15:07:03 +0000 Subject: [PATCH 007/154] MIPS: Prevent unaligned accesses during stack unwinding commit a3552dace7d1d0cabf573e88fc3025cb90c4a601 upstream. During stack unwinding we call a number of functions to determine what type of instruction we're looking at. The union mips_instruction pointer provided to them may be pointing at a 2 byte, but not 4 byte, aligned address & we thus cannot directly access the 4 byte wide members of the union mips_instruction. To avoid this is_ra_save_ins() copies the required half-words of the microMIPS instruction to a correctly aligned union mips_instruction on the stack, which it can then access safely. The is_jump_ins() & is_sp_move_ins() functions do not correctly perform this temporary copy, and instead attempt to directly dereference 4 byte fields which may be misaligned and lead to an address exception. Fix this by copying the instruction halfwords to a temporary union mips_instruction in get_frame_info() such that we can provide a 4 byte aligned union mips_instruction to the is_*_ins() functions and they do not need to deal with misalignment themselves. Signed-off-by: Paul Burton Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14529/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 70 +++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index c0aa99b2e314..941a88191f0e 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -198,8 +198,6 @@ struct mips_frame_info { static inline int is_ra_save_ins(union mips_instruction *ip) { #ifdef CONFIG_CPU_MICROMIPS - union mips_instruction mmi; - /* * swsp ra,offset * swm16 reglist,offset(sp) @@ -209,23 +207,20 @@ static inline int is_ra_save_ins(union mips_instruction *ip) * * microMIPS is way more fun... */ - if (mm_insn_16bit(ip->halfword[0])) { - mmi.word = (ip->halfword[0] << 16); - return (mmi.mm16_r5_format.opcode == mm_swsp16_op && - mmi.mm16_r5_format.rt == 31) || - (mmi.mm16_m_format.opcode == mm_pool16c_op && - mmi.mm16_m_format.func == mm_swm16_op); + if (mm_insn_16bit(ip->halfword[1])) { + return (ip->mm16_r5_format.opcode == mm_swsp16_op && + ip->mm16_r5_format.rt == 31) || + (ip->mm16_m_format.opcode == mm_pool16c_op && + ip->mm16_m_format.func == mm_swm16_op); } else { - mmi.halfword[0] = ip->halfword[1]; - mmi.halfword[1] = ip->halfword[0]; - return (mmi.mm_m_format.opcode == mm_pool32b_op && - mmi.mm_m_format.rd > 9 && - mmi.mm_m_format.base == 29 && - mmi.mm_m_format.func == mm_swm32_func) || - (mmi.i_format.opcode == mm_sw32_op && - mmi.i_format.rs == 29 && - mmi.i_format.rt == 31); + return (ip->mm_m_format.opcode == mm_pool32b_op && + ip->mm_m_format.rd > 9 && + ip->mm_m_format.base == 29 && + ip->mm_m_format.func == mm_swm32_func) || + (ip->i_format.opcode == mm_sw32_op && + ip->i_format.rs == 29 && + ip->i_format.rt == 31); } #else /* sw / sd $ra, offset($sp) */ @@ -246,12 +241,8 @@ static inline int is_jump_ins(union mips_instruction *ip) * * microMIPS is kind of more fun... */ - union mips_instruction mmi; - - mmi.word = (ip->halfword[0] << 16); - - if ((mmi.mm16_r5_format.opcode == mm_pool16c_op && - (mmi.mm16_r5_format.rt & mm_jr16_op) == mm_jr16_op) || + if ((ip->mm16_r5_format.opcode == mm_pool16c_op && + (ip->mm16_r5_format.rt & mm_jr16_op) == mm_jr16_op) || ip->j_format.opcode == mm_jal32_op) return 1; if (ip->r_format.opcode != mm_pool32a_op || @@ -280,15 +271,13 @@ static inline int is_sp_move_ins(union mips_instruction *ip) * * microMIPS is not more fun... */ - if (mm_insn_16bit(ip->halfword[0])) { - union mips_instruction mmi; - - mmi.word = (ip->halfword[0] << 16); - return (mmi.mm16_r3_format.opcode == mm_pool16d_op && - mmi.mm16_r3_format.simmediate && mm_addiusp_func) || - (mmi.mm16_r5_format.opcode == mm_pool16d_op && - mmi.mm16_r5_format.rt == 29); + if (mm_insn_16bit(ip->halfword[1])) { + return (ip->mm16_r3_format.opcode == mm_pool16d_op && + ip->mm16_r3_format.simmediate && mm_addiusp_func) || + (ip->mm16_r5_format.opcode == mm_pool16d_op && + ip->mm16_r5_format.rt == 29); } + return ip->mm_i_format.opcode == mm_addiu32_op && ip->mm_i_format.rt == 29 && ip->mm_i_format.rs == 29; #else @@ -303,7 +292,8 @@ static inline int is_sp_move_ins(union mips_instruction *ip) static int get_frame_info(struct mips_frame_info *info) { - union mips_instruction *ip; + bool is_mmips = IS_ENABLED(CONFIG_CPU_MICROMIPS); + union mips_instruction insn, *ip; unsigned max_insns = info->func_size / sizeof(union mips_instruction); unsigned i; @@ -319,11 +309,21 @@ static int get_frame_info(struct mips_frame_info *info) max_insns = min(128U, max_insns); for (i = 0; i < max_insns; i++, ip++) { + if (is_mmips && mm_insn_16bit(ip->halfword[0])) { + insn.halfword[0] = 0; + insn.halfword[1] = ip->halfword[0]; + } else if (is_mmips) { + insn.halfword[0] = ip->halfword[1]; + insn.halfword[1] = ip->halfword[0]; + } else { + insn.word = ip->word; + } - if (is_jump_ins(ip)) + if (is_jump_ins(&insn)) break; + if (!info->frame_size) { - if (is_sp_move_ins(ip)) + if (is_sp_move_ins(&insn)) { #ifdef CONFIG_CPU_MICROMIPS if (mm_insn_16bit(ip->halfword[0])) @@ -346,7 +346,7 @@ static int get_frame_info(struct mips_frame_info *info) } continue; } - if (info->pc_offset == -1 && is_ra_save_ins(ip)) { + if (info->pc_offset == -1 && is_ra_save_ins(&insn)) { info->pc_offset = ip->i_format.simmediate / sizeof(long); break; From b0b4eb58c5efe31c5dbd8fc771b43bb13bf84430 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Nov 2016 15:07:04 +0000 Subject: [PATCH 008/154] MIPS: Fix get_frame_info() handling of microMIPS function size commit b6c7a324df37bf05ef7a2c1580683cf10d082d97 upstream. get_frame_info() is meant to iterate over up to the first 128 instructions within a function, but for microMIPS kernels it will not reach that many instructions unless the function is 512 bytes long since we calculate the maximum number of instructions to check by dividing the function length by the 4 byte size of a union mips_instruction. In microMIPS kernels this won't do since instructions are variable length. Fix this by instead checking whether the pointer to the current instruction has reached the end of the function, and use max_insns as a simple constant to check the number of iterations against. Signed-off-by: Paul Burton Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14530/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 941a88191f0e..d56ef4ba0a84 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -293,9 +293,9 @@ static inline int is_sp_move_ins(union mips_instruction *ip) static int get_frame_info(struct mips_frame_info *info) { bool is_mmips = IS_ENABLED(CONFIG_CPU_MICROMIPS); - union mips_instruction insn, *ip; - unsigned max_insns = info->func_size / sizeof(union mips_instruction); - unsigned i; + union mips_instruction insn, *ip, *ip_end; + const unsigned int max_insns = 128; + unsigned int i; info->pc_offset = -1; info->frame_size = 0; @@ -304,11 +304,9 @@ static int get_frame_info(struct mips_frame_info *info) if (!ip) goto err; - if (max_insns == 0) - max_insns = 128U; /* unknown function size */ - max_insns = min(128U, max_insns); + ip_end = (void *)ip + info->func_size; - for (i = 0; i < max_insns; i++, ip++) { + for (i = 0; i < max_insns && ip < ip_end; i++, ip++) { if (is_mmips && mm_insn_16bit(ip->halfword[0])) { insn.halfword[0] = 0; insn.halfword[1] = ip->halfword[0]; From b14e085086245a829a87f6cc44c996333c641390 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Nov 2016 15:07:05 +0000 Subject: [PATCH 009/154] MIPS: Fix is_jump_ins() handling of 16b microMIPS instructions commit 67c75057709a6d85c681c78b9b2f9b71191f01a2 upstream. is_jump_ins() checks 16b instruction fields without verifying that the instruction is indeed 16b, as is done by is_ra_save_ins() & is_sp_move_ins(). Add the appropriate check. Signed-off-by: Paul Burton Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14531/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index d56ef4ba0a84..298dc33737b0 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -241,9 +241,14 @@ static inline int is_jump_ins(union mips_instruction *ip) * * microMIPS is kind of more fun... */ - if ((ip->mm16_r5_format.opcode == mm_pool16c_op && - (ip->mm16_r5_format.rt & mm_jr16_op) == mm_jr16_op) || - ip->j_format.opcode == mm_jal32_op) + if (mm_insn_16bit(ip->halfword[1])) { + if ((ip->mm16_r5_format.opcode == mm_pool16c_op && + (ip->mm16_r5_format.rt & mm_jr16_op) == mm_jr16_op)) + return 1; + return 0; + } + + if (ip->j_format.opcode == mm_jal32_op) return 1; if (ip->r_format.opcode != mm_pool32a_op || ip->r_format.func != mm_pool32axf_op) From 209ad1941daafab4255990c3b885cf184a05d72e Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Nov 2016 15:07:06 +0000 Subject: [PATCH 010/154] MIPS: Calculate microMIPS ra properly when unwinding the stack commit bb9bc4689b9c635714fbcd5d335bad9934a7ebfc upstream. get_frame_info() calculates the offset of the return address within a stack frame simply by dividing a the bottom 16 bits of the instruction, treated as a signed integer, by the size of a long. Whilst this works for MIPS32 & MIPS64 ISAs where the sw or sd instructions are used, it's incorrect for microMIPS where encodings differ. The result is that we typically completely fail to unwind the stack on microMIPS. Fix this by adjusting is_ra_save_ins() to calculate the return address offset, and take into account the various different encodings there in the same place as we consider whether an instruction is storing the ra/$31 register. With this we are now able to unwind the stack for kernels targetting the microMIPS ISA, for example we can produce: Call Trace: [<80109e1f>] show_stack+0x63/0x7c [<8011ea17>] __warn+0x9b/0xac [<8011ea45>] warn_slowpath_fmt+0x1d/0x20 [<8013fe53>] register_console+0x43/0x314 [<8067c58d>] of_setup_earlycon+0x1dd/0x1ec [<8067f63f>] early_init_dt_scan_chosen_stdout+0xe7/0xf8 [<8066c115>] do_early_param+0x75/0xac [<801302f9>] parse_args+0x1dd/0x308 [<8066c459>] parse_early_options+0x25/0x28 [<8066c48b>] parse_early_param+0x2f/0x38 [<8066e8cf>] setup_arch+0x113/0x488 [<8066c4f3>] start_kernel+0x57/0x328 ---[ end trace 0000000000000000 ]--- Whereas previously we only produced: Call Trace: [<80109e1f>] show_stack+0x63/0x7c ---[ end trace 0000000000000000 ]--- Signed-off-by: Paul Burton Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14532/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 83 +++++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 20 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 298dc33737b0..39c946fce939 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -195,7 +195,7 @@ struct mips_frame_info { #define J_TARGET(pc,target) \ (((unsigned long)(pc) & 0xf0000000) | ((target) << 2)) -static inline int is_ra_save_ins(union mips_instruction *ip) +static inline int is_ra_save_ins(union mips_instruction *ip, int *poff) { #ifdef CONFIG_CPU_MICROMIPS /* @@ -208,25 +208,70 @@ static inline int is_ra_save_ins(union mips_instruction *ip) * microMIPS is way more fun... */ if (mm_insn_16bit(ip->halfword[1])) { - return (ip->mm16_r5_format.opcode == mm_swsp16_op && - ip->mm16_r5_format.rt == 31) || - (ip->mm16_m_format.opcode == mm_pool16c_op && - ip->mm16_m_format.func == mm_swm16_op); + switch (ip->mm16_r5_format.opcode) { + case mm_swsp16_op: + if (ip->mm16_r5_format.rt != 31) + return 0; + + *poff = ip->mm16_r5_format.simmediate; + *poff = (*poff << 2) / sizeof(ulong); + return 1; + + case mm_pool16c_op: + switch (ip->mm16_m_format.func) { + case mm_swm16_op: + *poff = ip->mm16_m_format.imm; + *poff += 1 + ip->mm16_m_format.rlist; + *poff = (*poff << 2) / sizeof(ulong); + return 1; + + default: + return 0; + } + + default: + return 0; + } } - else { - return (ip->mm_m_format.opcode == mm_pool32b_op && - ip->mm_m_format.rd > 9 && - ip->mm_m_format.base == 29 && - ip->mm_m_format.func == mm_swm32_func) || - (ip->i_format.opcode == mm_sw32_op && - ip->i_format.rs == 29 && - ip->i_format.rt == 31); + + switch (ip->i_format.opcode) { + case mm_sw32_op: + if (ip->i_format.rs != 29) + return 0; + if (ip->i_format.rt != 31) + return 0; + + *poff = ip->i_format.simmediate / sizeof(ulong); + return 1; + + case mm_pool32b_op: + switch (ip->mm_m_format.func) { + case mm_swm32_func: + if (ip->mm_m_format.rd < 0x10) + return 0; + if (ip->mm_m_format.base != 29) + return 0; + + *poff = ip->mm_m_format.simmediate; + *poff += (ip->mm_m_format.rd & 0xf) * sizeof(u32); + *poff /= sizeof(ulong); + return 1; + default: + return 0; + } + + default: + return 0; } #else /* sw / sd $ra, offset($sp) */ - return (ip->i_format.opcode == sw_op || ip->i_format.opcode == sd_op) && - ip->i_format.rs == 29 && - ip->i_format.rt == 31; + if ((ip->i_format.opcode == sw_op || ip->i_format.opcode == sd_op) && + ip->i_format.rs == 29 && ip->i_format.rt == 31) { + *poff = ip->i_format.simmediate / sizeof(ulong); + return 1; + } + + return 0; #endif } @@ -349,11 +394,9 @@ static int get_frame_info(struct mips_frame_info *info) } continue; } - if (info->pc_offset == -1 && is_ra_save_ins(&insn)) { - info->pc_offset = - ip->i_format.simmediate / sizeof(long); + if (info->pc_offset == -1 && + is_ra_save_ins(&insn, &info->pc_offset)) break; - } } if (info->frame_size && info->pc_offset >= 0) /* nested */ return 0; From 8d06cbd365e1fccb5b84bb8146d9bf30c2a55f4a Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Nov 2016 15:07:07 +0000 Subject: [PATCH 011/154] MIPS: Handle microMIPS jumps in the same way as MIPS32/MIPS64 jumps commit 096a0de427ea333f56f0ee00328cff2a2731bcf1 upstream. is_jump_ins() checks for plain jump ("j") instructions since commit e7438c4b893e ("MIPS: Fix sibling call handling in get_frame_info") but that commit didn't make the same change to the microMIPS code, leaving it inconsistent with the MIPS32/MIPS64 code. Handle the microMIPS encoding of the jump instruction too such that it behaves consistently. Signed-off-by: Paul Burton Fixes: e7438c4b893e ("MIPS: Fix sibling call handling in get_frame_info") Cc: Tony Wu Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14533/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 39c946fce939..1652f36acad1 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -293,6 +293,8 @@ static inline int is_jump_ins(union mips_instruction *ip) return 0; } + if (ip->j_format.opcode == mm_j32_op) + return 1; if (ip->j_format.opcode == mm_jal32_op) return 1; if (ip->r_format.opcode != mm_pool32a_op || From 80bbadbc42f912b43fd6664e834ba16c9dd28889 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 18 Jan 2017 17:46:18 +0800 Subject: [PATCH 012/154] mmc: sdhci-acpi: support deferred probe commit e28d6f048799acb0014491e6b74e580d84bd7916 upstream. With commit 67bf5156edc4 ("gpio / ACPI: fix returned error from acpi_dev_gpio_irq_get()"), mmc_gpiod_request_cd() returns -EPROBE_DEFER if GPIO is not ready when sdhci-acpi driver is probed, and sdhci-acpi driver should be probed again later in this case. This fixes an order issue when both GPIO and sdhci-acpi drivers are built as modules. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=177101 Tested-by: Jonas Aaberg Signed-off-by: Zhang Rui Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-acpi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index fddd0be196f4..80918abfc468 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -466,7 +466,10 @@ static int sdhci_acpi_probe(struct platform_device *pdev) if (sdhci_acpi_flag(c, SDHCI_ACPI_SD_CD)) { bool v = sdhci_acpi_flag(c, SDHCI_ACPI_SD_CD_OVERRIDE_LEVEL); - if (mmc_gpiod_request_cd(host->mmc, NULL, 0, v, 0, NULL)) { + err = mmc_gpiod_request_cd(host->mmc, NULL, 0, v, 0, NULL); + if (err) { + if (err == -EPROBE_DEFER) + goto err_free; dev_warn(dev, "failed to setup card detect gpio\n"); c->use_runtime_pm = false; } From d6b88a09cc2291aa3fac1e6eefd479398c1d8793 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Tue, 27 Dec 2016 16:02:36 -0200 Subject: [PATCH 013/154] am437x-vpfe: always assign bpp variable commit 6ebf75774f823ddbdbd10921006989d4df222f4a upstream. In vpfe_s_fmt(), when the sensor format and the requested format were the same, bpp was assigned to vpfe->bpp without being initialized first. Grab the bpp value that is currently used by using __vpfe_get_format() instead of its wrapper, vpfe_try_fmt(). This use of uninitialized variable has been found by compiling the kernel with clang. Fixes: 417d2e507edc ("[media] media: platform: add VPFE capture driver support for AM437X") Signed-off-by: Nicolas Iooss Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/am437x/am437x-vpfe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/am437x/am437x-vpfe.c b/drivers/media/platform/am437x/am437x-vpfe.c index b33b9e35e60e..05489a401c5c 100644 --- a/drivers/media/platform/am437x/am437x-vpfe.c +++ b/drivers/media/platform/am437x/am437x-vpfe.c @@ -1576,7 +1576,7 @@ static int vpfe_s_fmt(struct file *file, void *priv, return -EBUSY; } - ret = vpfe_try_fmt(file, priv, &format); + ret = __vpfe_get_format(vpfe, &format, &bpp); if (ret) return ret; From 02789ccd59ec10fe407cf5803e4fb081a5c14fa7 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Mon, 12 Dec 2016 09:16:51 -0200 Subject: [PATCH 014/154] uvcvideo: Fix a wrong macro commit 17c341ec0115837a610b2da15e32546e26068234 upstream. Don't mix up UVC_BUF_STATE_* and VB2_BUF_STATE_* codes. Fixes: 6998b6fb4b1c ("[media] uvcvideo: Use videobuf2-vmalloc") Signed-off-by: Guennadi Liakhovetski Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/uvc/uvc_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/uvc/uvc_queue.c b/drivers/media/usb/uvc/uvc_queue.c index 77edd206d345..40e5a6b54955 100644 --- a/drivers/media/usb/uvc/uvc_queue.c +++ b/drivers/media/usb/uvc/uvc_queue.c @@ -412,7 +412,7 @@ struct uvc_buffer *uvc_queue_next_buffer(struct uvc_video_queue *queue, nextbuf = NULL; spin_unlock_irqrestore(&queue->irqlock, flags); - buf->state = buf->error ? VB2_BUF_STATE_ERROR : UVC_BUF_STATE_DONE; + buf->state = buf->error ? UVC_BUF_STATE_ERROR : UVC_BUF_STATE_DONE; vb2_set_plane_payload(&buf->buf.vb2_buf, 0, buf->bytesused); vb2_buffer_done(&buf->buf.vb2_buf, VB2_BUF_STATE_DONE); From 7e5b7798d0c8021d5d9b7a9e7bd404c6bcbbd011 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 7 Jan 2017 23:08:49 -0200 Subject: [PATCH 015/154] media: fix dm1105.c build error commit e3bb3cddd177550d63a3e4909cf1a7782f13414d upstream. Fix dm1105 build error when CONFIG_I2C_ALGOBIT=m and CONFIG_DVB_DM1105=y. drivers/built-in.o: In function `dm1105_probe': dm1105.c:(.text+0x2836e7): undefined reference to `i2c_bit_add_bus' Signed-off-by: Randy Dunlap Reported-by: kbuild test robot Cc: Javier Martinez Canillas Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/dm1105/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/pci/dm1105/Kconfig b/drivers/media/pci/dm1105/Kconfig index 173daf0c0847..14fa7e40f2a6 100644 --- a/drivers/media/pci/dm1105/Kconfig +++ b/drivers/media/pci/dm1105/Kconfig @@ -1,6 +1,6 @@ config DVB_DM1105 tristate "SDMC DM1105 based PCI cards" - depends on DVB_CORE && PCI && I2C + depends on DVB_CORE && PCI && I2C && I2C_ALGOBIT select DVB_PLL if MEDIA_SUBDRV_AUTOSELECT select DVB_STV0299 if MEDIA_SUBDRV_AUTOSELECT select DVB_STV0288 if MEDIA_SUBDRV_AUTOSELECT From 238442c2b535ef39be70f54904a8882dfd0349ca Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Mon, 16 Jan 2017 19:27:41 -0200 Subject: [PATCH 016/154] cxd2820r: fix gpio null pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0ffb94b6cc5df6376ab6bff5b80075641f6716f8 upstream. Setting GPIOs during probe causes null pointer deference when GPIOLIB was not selected by Kconfig. Initialize driver private field before calling set gpios. It is regressing bug since 4.9. Fixes: 07fdf7d9f19f ("[media] cxd2820r: add I2C driver bindings") Reported-by: Chris Rankin Tested-by: Chris Rankin Tested-by: Håkan Lennestål Signed-off-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/cxd2820r_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/cxd2820r_core.c b/drivers/media/dvb-frontends/cxd2820r_core.c index 95267c6edb3a..f6ebbb47b9b2 100644 --- a/drivers/media/dvb-frontends/cxd2820r_core.c +++ b/drivers/media/dvb-frontends/cxd2820r_core.c @@ -615,6 +615,7 @@ static int cxd2820r_probe(struct i2c_client *client, } priv->client[0] = client; + priv->fe.demodulator_priv = priv; priv->i2c = client->adapter; priv->ts_mode = pdata->ts_mode; priv->ts_clk_inv = pdata->ts_clk_inv; @@ -697,7 +698,6 @@ static int cxd2820r_probe(struct i2c_client *client, memcpy(&priv->fe.ops, &cxd2820r_ops, sizeof(priv->fe.ops)); if (!pdata->attach_in_use) priv->fe.ops.release = NULL; - priv->fe.demodulator_priv = priv; i2c_set_clientdata(client, priv); /* Setup callbacks */ From ce1e60b492144b1d5ae0f67b781459325de0e884 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Fri, 2 Dec 2016 15:16:08 -0200 Subject: [PATCH 017/154] lirc_dev: LIRC_{G,S}ET_REC_MODE do not work commit bd291208d7f5d6b2d6a033fee449a429230b06df upstream. Since "273b902 [media] lirc_dev: use LIRC_CAN_REC() define" these ioctls no longer work. Signed-off-by: Sean Young Reviewed-by: Andi Shyti Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/lirc_dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index 91f9bb87ce68..6ebe89551961 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -589,7 +589,7 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) result = put_user(ir->d.features, (__u32 __user *)arg); break; case LIRC_GET_REC_MODE: - if (LIRC_CAN_REC(ir->d.features)) { + if (!LIRC_CAN_REC(ir->d.features)) { result = -ENOTTY; break; } @@ -599,7 +599,7 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) (__u32 __user *)arg); break; case LIRC_SET_REC_MODE: - if (LIRC_CAN_REC(ir->d.features)) { + if (!LIRC_CAN_REC(ir->d.features)) { result = -ENOTTY; break; } From 719f1765b02c6a65eff5db3765c87444a1156cb9 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 2 Jan 2017 08:32:47 -0200 Subject: [PATCH 018/154] media: Properly pass through media entity types in entity enumeration commit 98d85f3cb912fde14593ead54dea4c1a00b3966f upstream. When the functions replaced media entity types, the range which was allowed for the types was incorrect. This meant that media entity types for specific devices were not passed correctly to the userspace through MEDIA_IOC_ENUM_ENTITIES. Fix it. Fixes: commit b2cd27448b33 ("[media] media-device: map new functions into old types for legacy API") Reported-and-tested-by: Antti Laakso Signed-off-by: Sakari Ailus Acked-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/media-device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/media-device.c b/drivers/media/media-device.c index 2783531f9fc0..4462d8c69d57 100644 --- a/drivers/media/media-device.c +++ b/drivers/media/media-device.c @@ -130,7 +130,7 @@ static long media_device_enum_entities(struct media_device *mdev, * old range. */ if (ent->function < MEDIA_ENT_F_OLD_BASE || - ent->function > MEDIA_ENT_T_DEVNODE_UNKNOWN) { + ent->function > MEDIA_ENT_F_TUNER) { if (is_media_entity_v4l2_subdev(ent)) entd->type = MEDIA_ENT_F_V4L2_SUBDEV_UNKNOWN; else if (ent->function != MEDIA_ENT_F_IO_V4L) From da1e40237f8f3516581b534c484c236a79ccfd14 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 11 Jan 2017 21:50:46 -0500 Subject: [PATCH 019/154] ext4: fix deadlock between inline_data and ext4_expand_extra_isize_ea() commit c755e251357a0cee0679081f08c3f4ba797a8009 upstream. The xattr_sem deadlock problems fixed in commit 2e81a4eeedca: "ext4: avoid deadlock when expanding inode size" didn't include the use of xattr_sem in fs/ext4/inline.c. With the addition of project quota which added a new extra inode field, this exposed deadlocks in the inline_data code similar to the ones fixed by 2e81a4eeedca. The deadlock can be reproduced via: dmesg -n 7 mke2fs -t ext4 -O inline_data -Fq -I 256 /dev/vdc 32768 mount -t ext4 -o debug_want_extra_isize=24 /dev/vdc /vdc mkdir /vdc/a umount /vdc mount -t ext4 /dev/vdc /vdc echo foo > /vdc/a/foo and looks like this: [ 11.158815] [ 11.160276] ============================================= [ 11.161960] [ INFO: possible recursive locking detected ] [ 11.161960] 4.10.0-rc3-00015-g011b30a8a3cf #160 Tainted: G W [ 11.161960] --------------------------------------------- [ 11.161960] bash/2519 is trying to acquire lock: [ 11.161960] (&ei->xattr_sem){++++..}, at: [] ext4_expand_extra_isize_ea+0x3d/0x4cd [ 11.161960] [ 11.161960] but task is already holding lock: [ 11.161960] (&ei->xattr_sem){++++..}, at: [] ext4_try_add_inline_entry+0x3a/0x152 [ 11.161960] [ 11.161960] other info that might help us debug this: [ 11.161960] Possible unsafe locking scenario: [ 11.161960] [ 11.161960] CPU0 [ 11.161960] ---- [ 11.161960] lock(&ei->xattr_sem); [ 11.161960] lock(&ei->xattr_sem); [ 11.161960] [ 11.161960] *** DEADLOCK *** [ 11.161960] [ 11.161960] May be due to missing lock nesting notation [ 11.161960] [ 11.161960] 4 locks held by bash/2519: [ 11.161960] #0: (sb_writers#3){.+.+.+}, at: [] mnt_want_write+0x1e/0x3e [ 11.161960] #1: (&type->i_mutex_dir_key){++++++}, at: [] path_openat+0x338/0x67a [ 11.161960] #2: (jbd2_handle){++++..}, at: [] start_this_handle+0x582/0x622 [ 11.161960] #3: (&ei->xattr_sem){++++..}, at: [] ext4_try_add_inline_entry+0x3a/0x152 [ 11.161960] [ 11.161960] stack backtrace: [ 11.161960] CPU: 0 PID: 2519 Comm: bash Tainted: G W 4.10.0-rc3-00015-g011b30a8a3cf #160 [ 11.161960] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1 04/01/2014 [ 11.161960] Call Trace: [ 11.161960] dump_stack+0x72/0xa3 [ 11.161960] __lock_acquire+0xb7c/0xcb9 [ 11.161960] ? kvm_clock_read+0x1f/0x29 [ 11.161960] ? __lock_is_held+0x36/0x66 [ 11.161960] ? __lock_is_held+0x36/0x66 [ 11.161960] lock_acquire+0x106/0x18a [ 11.161960] ? ext4_expand_extra_isize_ea+0x3d/0x4cd [ 11.161960] down_write+0x39/0x72 [ 11.161960] ? ext4_expand_extra_isize_ea+0x3d/0x4cd [ 11.161960] ext4_expand_extra_isize_ea+0x3d/0x4cd [ 11.161960] ? _raw_read_unlock+0x22/0x2c [ 11.161960] ? jbd2_journal_extend+0x1e2/0x262 [ 11.161960] ? __ext4_journal_get_write_access+0x3d/0x60 [ 11.161960] ext4_mark_inode_dirty+0x17d/0x26d [ 11.161960] ? ext4_add_dirent_to_inline.isra.12+0xa5/0xb2 [ 11.161960] ext4_add_dirent_to_inline.isra.12+0xa5/0xb2 [ 11.161960] ext4_try_add_inline_entry+0x69/0x152 [ 11.161960] ext4_add_entry+0xa3/0x848 [ 11.161960] ? __brelse+0x14/0x2f [ 11.161960] ? _raw_spin_unlock_irqrestore+0x44/0x4f [ 11.161960] ext4_add_nondir+0x17/0x5b [ 11.161960] ext4_create+0xcf/0x133 [ 11.161960] ? ext4_mknod+0x12f/0x12f [ 11.161960] lookup_open+0x39e/0x3fb [ 11.161960] ? __wake_up+0x1a/0x40 [ 11.161960] ? lock_acquire+0x11e/0x18a [ 11.161960] path_openat+0x35c/0x67a [ 11.161960] ? sched_clock_cpu+0xd7/0xf2 [ 11.161960] do_filp_open+0x36/0x7c [ 11.161960] ? _raw_spin_unlock+0x22/0x2c [ 11.161960] ? __alloc_fd+0x169/0x173 [ 11.161960] do_sys_open+0x59/0xcc [ 11.161960] SyS_open+0x1d/0x1f [ 11.161960] do_int80_syscall_32+0x4f/0x61 [ 11.161960] entry_INT80_32+0x2f/0x2f [ 11.161960] EIP: 0xb76ad469 [ 11.161960] EFLAGS: 00000286 CPU: 0 [ 11.161960] EAX: ffffffda EBX: 08168ac8 ECX: 00008241 EDX: 000001b6 [ 11.161960] ESI: b75e46bc EDI: b7755000 EBP: bfbdb108 ESP: bfbdafc0 [ 11.161960] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b Reported-by: George Spelvin Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 66 ++++++++++++++++++++++-------------------------- fs/ext4/xattr.c | 30 +++++++++------------- fs/ext4/xattr.h | 32 +++++++++++++++++++++++ 3 files changed, 74 insertions(+), 54 deletions(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index d8ca4b9f9dd6..028329721bbe 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -376,7 +376,7 @@ out: static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, unsigned int len) { - int ret, size; + int ret, size, no_expand; struct ext4_inode_info *ei = EXT4_I(inode); if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) @@ -386,15 +386,14 @@ static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, if (size < len) return -ENOSPC; - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); if (ei->i_inline_off) ret = ext4_update_inline_data(handle, inode, len); else ret = ext4_create_inline_data(handle, inode, len); - up_write(&EXT4_I(inode)->xattr_sem); - + ext4_write_unlock_xattr(inode, &no_expand); return ret; } @@ -523,7 +522,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping, struct inode *inode, unsigned flags) { - int ret, needed_blocks; + int ret, needed_blocks, no_expand; handle_t *handle = NULL; int retries = 0, sem_held = 0; struct page *page = NULL; @@ -563,7 +562,7 @@ retry: goto out; } - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); sem_held = 1; /* If some one has already done this for us, just exit. */ if (!ext4_has_inline_data(inode)) { @@ -600,7 +599,7 @@ retry: put_page(page); page = NULL; ext4_orphan_add(handle, inode); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); sem_held = 0; ext4_journal_stop(handle); handle = NULL; @@ -626,7 +625,7 @@ out: put_page(page); } if (sem_held) - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); if (handle) ext4_journal_stop(handle); brelse(iloc.bh); @@ -719,7 +718,7 @@ convert: int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, struct page *page) { - int ret; + int ret, no_expand; void *kaddr; struct ext4_iloc iloc; @@ -737,7 +736,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, goto out; } - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); BUG_ON(!ext4_has_inline_data(inode)); kaddr = kmap_atomic(page); @@ -747,7 +746,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, /* clear page dirty so that writepages wouldn't work for us. */ ClearPageDirty(page); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); brelse(iloc.bh); out: return copied; @@ -758,7 +757,7 @@ ext4_journalled_write_inline_data(struct inode *inode, unsigned len, struct page *page) { - int ret; + int ret, no_expand; void *kaddr; struct ext4_iloc iloc; @@ -768,11 +767,11 @@ ext4_journalled_write_inline_data(struct inode *inode, return NULL; } - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); kaddr = kmap_atomic(page); ext4_write_inline_data(inode, &iloc, kaddr, 0, len); kunmap_atomic(kaddr); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return iloc.bh; } @@ -1249,7 +1248,7 @@ out: int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode) { - int ret, inline_size; + int ret, inline_size, no_expand; void *inline_start; struct ext4_iloc iloc; @@ -1257,7 +1256,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, if (ret) return ret; - down_write(&EXT4_I(dir)->xattr_sem); + ext4_write_lock_xattr(dir, &no_expand); if (!ext4_has_inline_data(dir)) goto out; @@ -1303,7 +1302,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, out: ext4_mark_inode_dirty(handle, dir); - up_write(&EXT4_I(dir)->xattr_sem); + ext4_write_unlock_xattr(dir, &no_expand); brelse(iloc.bh); return ret; } @@ -1663,7 +1662,7 @@ int ext4_delete_inline_entry(handle_t *handle, struct buffer_head *bh, int *has_inline_data) { - int err, inline_size; + int err, inline_size, no_expand; struct ext4_iloc iloc; void *inline_start; @@ -1671,7 +1670,7 @@ int ext4_delete_inline_entry(handle_t *handle, if (err) return err; - down_write(&EXT4_I(dir)->xattr_sem); + ext4_write_lock_xattr(dir, &no_expand); if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; goto out; @@ -1705,7 +1704,7 @@ int ext4_delete_inline_entry(handle_t *handle, ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size); out: - up_write(&EXT4_I(dir)->xattr_sem); + ext4_write_unlock_xattr(dir, &no_expand); brelse(iloc.bh); if (err != -ENOENT) ext4_std_error(dir->i_sb, err); @@ -1804,11 +1803,11 @@ out: int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) { - int ret; + int ret, no_expand; - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); ret = ext4_destroy_inline_data_nolock(handle, inode); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return ret; } @@ -1893,7 +1892,7 @@ out: void ext4_inline_data_truncate(struct inode *inode, int *has_inline) { handle_t *handle; - int inline_size, value_len, needed_blocks; + int inline_size, value_len, needed_blocks, no_expand; size_t i_size; void *value = NULL; struct ext4_xattr_ibody_find is = { @@ -1910,7 +1909,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline) if (IS_ERR(handle)) return; - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); if (!ext4_has_inline_data(inode)) { *has_inline = 0; ext4_journal_stop(handle); @@ -1968,7 +1967,7 @@ out_error: up_write(&EXT4_I(inode)->i_data_sem); out: brelse(is.iloc.bh); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); kfree(value); if (inode->i_nlink) ext4_orphan_del(handle, inode); @@ -1984,7 +1983,7 @@ out: int ext4_convert_inline_data(struct inode *inode) { - int error, needed_blocks; + int error, needed_blocks, no_expand; handle_t *handle; struct ext4_iloc iloc; @@ -2006,15 +2005,10 @@ int ext4_convert_inline_data(struct inode *inode) goto out_free; } - down_write(&EXT4_I(inode)->xattr_sem); - if (!ext4_has_inline_data(inode)) { - up_write(&EXT4_I(inode)->xattr_sem); - goto out; - } - - error = ext4_convert_inline_data_nolock(handle, inode, &iloc); - up_write(&EXT4_I(inode)->xattr_sem); -out: + ext4_write_lock_xattr(inode, &no_expand); + if (ext4_has_inline_data(inode)) + error = ext4_convert_inline_data_nolock(handle, inode, &iloc); + ext4_write_unlock_xattr(inode, &no_expand); ext4_journal_stop(handle); out_free: brelse(iloc.bh); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index d77be9e9f535..4448ed37181b 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1174,16 +1174,14 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, struct ext4_xattr_block_find bs = { .s = { .not_found = -ENODATA, }, }; - unsigned long no_expand; + int no_expand; int error; if (!name) return -EINVAL; if (strlen(name) > 255) return -ERANGE; - down_write(&EXT4_I(inode)->xattr_sem); - no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); - ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); + ext4_write_lock_xattr(inode, &no_expand); error = ext4_reserve_inode_write(handle, inode, &is.iloc); if (error) @@ -1251,7 +1249,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, ext4_xattr_update_super_block(handle, inode->i_sb); inode->i_ctime = ext4_current_time(inode); if (!value) - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); + no_expand = 0; error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); /* * The bh is consumed by ext4_mark_iloc_dirty, even with @@ -1265,9 +1263,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, cleanup: brelse(is.iloc.bh); brelse(bs.bh); - if (no_expand == 0) - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return error; } @@ -1484,12 +1480,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, int error = 0, tried_min_extra_isize = 0; int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); int isize_diff; /* How much do we need to grow i_extra_isize */ + int no_expand; + + if (ext4_write_trylock_xattr(inode, &no_expand) == 0) + return 0; - down_write(&EXT4_I(inode)->xattr_sem); - /* - * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty - */ - ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); retry: isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize; if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) @@ -1571,17 +1566,16 @@ shift: EXT4_I(inode)->i_extra_isize = new_extra_isize; brelse(bh); out: - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return 0; cleanup: brelse(bh); /* - * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode - * size expansion failed. + * Inode size expansion failed; don't try again */ - up_write(&EXT4_I(inode)->xattr_sem); + no_expand = 1; + ext4_write_unlock_xattr(inode, &no_expand); return error; } diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index a92e783fa057..099c8b670ef5 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -102,6 +102,38 @@ extern const struct xattr_handler ext4_xattr_security_handler; #define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c" +/* + * The EXT4_STATE_NO_EXPAND is overloaded and used for two purposes. + * The first is to signal that there the inline xattrs and data are + * taking up so much space that we might as well not keep trying to + * expand it. The second is that xattr_sem is taken for writing, so + * we shouldn't try to recurse into the inode expansion. For this + * second case, we need to make sure that we take save and restore the + * NO_EXPAND state flag appropriately. + */ +static inline void ext4_write_lock_xattr(struct inode *inode, int *save) +{ + down_write(&EXT4_I(inode)->xattr_sem); + *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); +} + +static inline int ext4_write_trylock_xattr(struct inode *inode, int *save) +{ + if (down_write_trylock(&EXT4_I(inode)->xattr_sem) == 0) + return 0; + *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); + return 1; +} + +static inline void ext4_write_unlock_xattr(struct inode *inode, int *save) +{ + if (*save == 0) + ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); + up_write(&EXT4_I(inode)->xattr_sem); +} + extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); From 404950add4ce0bfb95346a8233f5daba7fad8a54 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 10 Feb 2017 11:20:19 +0900 Subject: [PATCH 020/154] spi: s3c64xx: fix inconsistency between binding and driver commit 379f831a927817c130a62e3ca0082ae685557324 upstream. Commit a92e7c3d82a1 ("spi: s3c64xx: consider the case when the CS line is not connected") introduced an inconsistency between the binding, where the disconnected CS line was marked as 'no-cs-readback', and the driver. The driver is erroneously checking for that attribute with property name of 'broken-cs'. Check for 'no-cs-readback' in the driver as well. Fixes: a92e7c3d82a1 ("spi: s3c64xx: consider the case when the CS line is not connected") Signed-off-by: Andi Shyti Reviewed-by: Krzysztof Kozlowski Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-s3c64xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index 3c09e94cf827..186342b74141 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -1003,7 +1003,7 @@ static struct s3c64xx_spi_info *s3c64xx_spi_parse_dt(struct device *dev) sci->num_cs = temp; } - sci->no_cs = of_property_read_bool(dev->of_node, "broken-cs"); + sci->no_cs = of_property_read_bool(dev->of_node, "no-cs-readback"); return sci; } From 37e70c4de51bf80123292f175dfce6b19af9c5d8 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 25 Oct 2016 11:37:58 +0200 Subject: [PATCH 021/154] ARM: at91: define LPDDR types commit e3f0a4017c2143b4b813df6a93e8cf79e3f76936 upstream. The Atmel MPDDR controller support LPDDR2 and LPDDR3 memories, add their types. Signed-off-by: Alexandre Belloni Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- include/soc/at91/at91sam9_ddrsdr.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/soc/at91/at91sam9_ddrsdr.h b/include/soc/at91/at91sam9_ddrsdr.h index dc10c52e0e91..393362bdb860 100644 --- a/include/soc/at91/at91sam9_ddrsdr.h +++ b/include/soc/at91/at91sam9_ddrsdr.h @@ -81,6 +81,7 @@ #define AT91_DDRSDRC_LPCB_POWER_DOWN 2 #define AT91_DDRSDRC_LPCB_DEEP_POWER_DOWN 3 #define AT91_DDRSDRC_CLKFR (1 << 2) /* Clock Frozen */ +#define AT91_DDRSDRC_LPDDR2_PWOFF (1 << 3) /* LPDDR Power Off */ #define AT91_DDRSDRC_PASR (7 << 4) /* Partial Array Self Refresh */ #define AT91_DDRSDRC_TCSR (3 << 8) /* Temperature Compensated Self Refresh */ #define AT91_DDRSDRC_DS (3 << 10) /* Drive Strength */ @@ -96,7 +97,9 @@ #define AT91_DDRSDRC_MD_SDR 0 #define AT91_DDRSDRC_MD_LOW_POWER_SDR 1 #define AT91_DDRSDRC_MD_LOW_POWER_DDR 3 +#define AT91_DDRSDRC_MD_LPDDR3 5 #define AT91_DDRSDRC_MD_DDR2 6 /* [SAM9 Only] */ +#define AT91_DDRSDRC_MD_LPDDR2 7 #define AT91_DDRSDRC_DBW (1 << 4) /* Data Bus Width */ #define AT91_DDRSDRC_DBW_32BITS (0 << 4) #define AT91_DDRSDRC_DBW_16BITS (1 << 4) From f9a1949f8ff308d4320a44aa995c7f16ab40cf7f Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 19 Jan 2017 01:46:58 +0100 Subject: [PATCH 022/154] ARM: dts: at91: Enable DMA on sama5d4_xplained console commit ef8d02d4a2c36f7a93e74c95a9c419353b310117 upstream. Enable DMA on usart3 to get a more reliable console. This is especially useful for automation and kernelci were a kernel with PROVE_LOCKING enabled is quite susceptible to character loss, resulting in tests failure. Acked-by: Nicolas Ferre Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/at91-sama5d4_xplained.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts index ed7fce297738..44d1171c7fc0 100644 --- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts @@ -110,6 +110,8 @@ }; usart3: serial@fc00c000 { + atmel,use-dma-rx; + atmel,use-dma-tx; status = "okay"; }; From 9971863fdd623da5df091159cc889ea101b68202 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 19 Jan 2017 23:05:39 +0100 Subject: [PATCH 023/154] ARM: dts: at91: Enable DMA on sama5d2_xplained console commit 78162d48466d23c45a784034630c5928af631e3d upstream. Enable DMA on uart1 to get a more reliable console. Acked-by: Nicolas Ferre Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/at91-sama5d2_xplained.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/at91-sama5d2_xplained.dts b/arch/arm/boot/dts/at91-sama5d2_xplained.dts index 0b9a59d5fdac..30fac04289a5 100644 --- a/arch/arm/boot/dts/at91-sama5d2_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d2_xplained.dts @@ -148,6 +148,8 @@ uart1: serial@f8020000 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_uart1_default>; + atmel,use-dma-rx; + atmel,use-dma-tx; status = "okay"; }; From ebc3e95502aa9feebe1fd021b6d3c44dd27ecb48 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 16 Feb 2017 15:26:54 +0800 Subject: [PATCH 024/154] ALSA: hda/realtek - Cannot adjust speaker's volume on a Dell AIO commit 9f1bc2c4c58fcb2d86e0e26437dc8f3a18ac3276 upstream. The issue is the same as "dd9aa335c880 ALSA: hda/realtek - Can't adjust speaker's volume on a Dell AIO", the output requires to connect to a node with Amp-out capability. Applying the same fixup "ALC298_FIXUP_SPK_VOLUME" can fix the issue. Signed-off-by: Kai-Heng Feng Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 758ac86a1d3a..d4402b02b1bc 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6065,6 +6065,12 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, ALC298_STANDARD_PINS, {0x17, 0x90170150}), + SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_SPK_VOLUME, + {0x12, 0xb7a60140}, + {0x13, 0xb7a60150}, + {0x17, 0x90170110}, + {0x1a, 0x03011020}, + {0x21, 0x03211030}), {} }; From 51ce9867c2efd422519edb92e6d4ea5276b3f283 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Wed, 15 Feb 2017 17:09:42 +0100 Subject: [PATCH 025/154] ALSA: hda - fix Lewisburg audio issue commit e7480b34ad1ab84a63540b2c884cb92c0764ab74 upstream. Like for Sunrise Point, the total stream number of Lewisburg's input and output stream exceeds 15 (GCAP is 0x9701), which will cause some streams do not work because of the overflow on SDxCTL.STRM field if using the legacy stream tag allocation method. Fixes: 5cf92c8b3dc5 ("ALSA: hda - Add Intel Lewisburg device IDs Audio") Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index c64d986009a9..bc4462694aaf 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2197,9 +2197,9 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, /* Lewisburg */ { PCI_DEVICE(0x8086, 0xa1f0), - .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, { PCI_DEVICE(0x8086, 0xa270), - .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, /* Lynx Point-LP */ { PCI_DEVICE(0x8086, 0x9c20), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, From 5ac9276dd15f6aeb55e8eb0ecf6e15759ae1f501 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 28 Feb 2017 14:49:07 +0100 Subject: [PATCH 026/154] ALSA: timer: Reject user params with too small ticks commit 71321eb3f2d0df4e6c327e0b936eec4458a12054 upstream. When a user sets a too small ticks with a fine-grained timer like hrtimer, the kernel tries to fire up the timer irq too frequently. This may lead to the condensed locks, eventually the kernel spinlock lockup with warnings. For avoiding such a situation, we define a lower limit of the resolution, namely 1ms. When the user passes a too small tick value that results in less than that, the kernel returns -EINVAL now. Reported-by: Dmitry Vyukov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/timer.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index fc144f43faa6..ad153149b231 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1702,9 +1702,21 @@ static int snd_timer_user_params(struct file *file, return -EBADFD; if (copy_from_user(¶ms, _params, sizeof(params))) return -EFAULT; - if (!(t->hw.flags & SNDRV_TIMER_HW_SLAVE) && params.ticks < 1) { - err = -EINVAL; - goto _end; + if (!(t->hw.flags & SNDRV_TIMER_HW_SLAVE)) { + u64 resolution; + + if (params.ticks < 1) { + err = -EINVAL; + goto _end; + } + + /* Don't allow resolution less than 1ms */ + resolution = snd_timer_resolution(tu->timeri); + resolution *= params.ticks; + if (resolution < 1000000) { + err = -EINVAL; + goto _end; + } } if (params.queue_size > 0 && (params.queue_size < 32 || params.queue_size > 1024)) { From 074f6db61f962e302c83092ac2ee5692f9351454 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 28 Feb 2017 17:16:48 +0100 Subject: [PATCH 027/154] ALSA: ctxfi: Fallback DMA mask to 32bit commit 15c75b09f8d190f89ab4db463b87d411ca349dfe upstream. Currently ctxfi driver tries to set only the 64bit DMA mask on 64bit architectures, and bails out if it fails. This causes a problem on some platforms since the 64bit DMA isn't always guaranteed. We should fall back to the default 32bit DMA when 64bit DMA fails. Fixes: 6d74b86d3c0f ("ALSA: ctxfi - Allow 64bit DMA") Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/ctxfi/cthw20k1.c | 19 ++++++------------- sound/pci/ctxfi/cthw20k2.c | 19 ++++++------------- 2 files changed, 12 insertions(+), 26 deletions(-) diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c index 9667cbfb0ca2..ab4cdab5cfa5 100644 --- a/sound/pci/ctxfi/cthw20k1.c +++ b/sound/pci/ctxfi/cthw20k1.c @@ -27,12 +27,6 @@ #include "cthw20k1.h" #include "ct20k1reg.h" -#if BITS_PER_LONG == 32 -#define CT_XFI_DMA_MASK DMA_BIT_MASK(32) /* 32 bit PTE */ -#else -#define CT_XFI_DMA_MASK DMA_BIT_MASK(64) /* 64 bit PTE */ -#endif - struct hw20k1 { struct hw hw; spinlock_t reg_20k1_lock; @@ -1904,19 +1898,18 @@ static int hw_card_start(struct hw *hw) { int err; struct pci_dev *pci = hw->pci; + const unsigned int dma_bits = BITS_PER_LONG; err = pci_enable_device(pci); if (err < 0) return err; /* Set DMA transfer mask */ - if (dma_set_mask(&pci->dev, CT_XFI_DMA_MASK) < 0 || - dma_set_coherent_mask(&pci->dev, CT_XFI_DMA_MASK) < 0) { - dev_err(hw->card->dev, - "architecture does not support PCI busmaster DMA with mask 0x%llx\n", - CT_XFI_DMA_MASK); - err = -ENXIO; - goto error1; + if (dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) { + dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(dma_bits)); + } else { + dma_set_mask(&pci->dev, DMA_BIT_MASK(32)); + dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(32)); } if (!hw->io_base) { diff --git a/sound/pci/ctxfi/cthw20k2.c b/sound/pci/ctxfi/cthw20k2.c index 6414ecf93efa..18ee7768b7c4 100644 --- a/sound/pci/ctxfi/cthw20k2.c +++ b/sound/pci/ctxfi/cthw20k2.c @@ -26,12 +26,6 @@ #include "cthw20k2.h" #include "ct20k2reg.h" -#if BITS_PER_LONG == 32 -#define CT_XFI_DMA_MASK DMA_BIT_MASK(32) /* 32 bit PTE */ -#else -#define CT_XFI_DMA_MASK DMA_BIT_MASK(64) /* 64 bit PTE */ -#endif - struct hw20k2 { struct hw hw; /* for i2c */ @@ -2029,19 +2023,18 @@ static int hw_card_start(struct hw *hw) int err = 0; struct pci_dev *pci = hw->pci; unsigned int gctl; + const unsigned int dma_bits = BITS_PER_LONG; err = pci_enable_device(pci); if (err < 0) return err; /* Set DMA transfer mask */ - if (dma_set_mask(&pci->dev, CT_XFI_DMA_MASK) < 0 || - dma_set_coherent_mask(&pci->dev, CT_XFI_DMA_MASK) < 0) { - dev_err(hw->card->dev, - "architecture does not support PCI busmaster DMA with mask 0x%llx\n", - CT_XFI_DMA_MASK); - err = -ENXIO; - goto error1; + if (!dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) { + dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(dma_bits)); + } else { + dma_set_mask(&pci->dev, DMA_BIT_MASK(32)); + dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(32)); } if (!hw->io_base) { From 09cd5d3479b930f5cde8ed4d66784e724ffcf08c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 28 Feb 2017 22:15:51 +0100 Subject: [PATCH 028/154] ALSA: seq: Fix link corruption by event error handling commit f3ac9f737603da80c2da3e84b89e74429836bb6d upstream. The sequencer FIFO management has a bug that may lead to a corruption (shortage) of the cell linked list. When a sequencer client faces an error at the event delivery, it tries to put back the dequeued cell. When the first queue was put back, this forgot the tail pointer tracking, and the link will be screwed up. Although there is no memory corruption, the sequencer client may stall forever at exit while flushing the pending FIFO cells in snd_seq_pool_done(), as spotted by syzkaller. This patch addresses the missing tail pointer tracking at snd_seq_fifo_cell_putback(). Also the patch makes sure to clear the cell->enxt pointer at snd_seq_fifo_event_in() for avoiding a similar mess-up of the FIFO linked list. Reported-by: Dmitry Vyukov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_fifo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c index 1d5acbe0c08b..86240d02b530 100644 --- a/sound/core/seq/seq_fifo.c +++ b/sound/core/seq/seq_fifo.c @@ -135,6 +135,7 @@ int snd_seq_fifo_event_in(struct snd_seq_fifo *f, f->tail = cell; if (f->head == NULL) f->head = cell; + cell->next = NULL; f->cells++; spin_unlock_irqrestore(&f->lock, flags); @@ -214,6 +215,8 @@ void snd_seq_fifo_cell_putback(struct snd_seq_fifo *f, spin_lock_irqsave(&f->lock, flags); cell->next = f->head; f->head = cell; + if (!f->tail) + f->tail = cell; f->cells++; spin_unlock_irqrestore(&f->lock, flags); } From 2abe620e01c9a39c3b76dd419f6c0e58fae4b885 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 28 Feb 2017 17:27:57 +0100 Subject: [PATCH 029/154] ALSA: hda - Add subwoofer support for Dell Inspiron 17 7000 Gaming commit 493de342748cc6f52938096f5480cf291da58a0b upstream. Dell Inspiron 17 7000 Gaming laptop needs a similar quirk like Inspiron 7599 to support its subwoofer speaker. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=194191 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d4402b02b1bc..f026fc94b227 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5577,6 +5577,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE), SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME), + SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), From acb06ff2d5d559c7a934fdb7f841cfc19eb4e4bd Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Mon, 27 Feb 2017 10:11:47 +0800 Subject: [PATCH 030/154] ALSA: hda - Fix micmute hotkey problem for a lenovo AIO machine commit 29693efcea0f38cf40d0055d2401490a4f9bf8be upstream. On this machine, the micmute button is connected to Line2 of the codec and the micmute led is connected to GPIO2 of the codec. After applying this quirk, both hotkey and led work well. Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f026fc94b227..0c62b1d8c11b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5693,6 +5693,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), + SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "IdeaPad Y410P", ALC269_FIXUP_NO_SHUTUP), From 4401e4779e2e6edea43f80ba1dc8e25e9bec7abd Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 8 Feb 2017 14:07:42 -0800 Subject: [PATCH 031/154] hwmon: (it87) Do not overwrite bit 2..6 of pwm control registers commit 4c7b8ca1ae5ed9e27014732c8a918ba11a86cf09 upstream. In IT8620E, after setting pwm control to manual, it was observed that pwm values for fan 4..6 have reversed results (writing 0 results in fans running at full speed, writing 255 results in fans turned off). With the new PWM control, pwm polarity for pwm control 4..6 is specified in its pwm control registers. Those registers are overwritten when setting the pwm mode or the temperature mapping. Do not touch bit 2..6 of pwm control registers on register writes to fix the problem. Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/it87.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index ad82cb28d87a..991e0fce781e 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -1300,25 +1300,35 @@ static ssize_t set_pwm_enable(struct device *dev, struct device_attribute *attr, it87_write_value(data, IT87_REG_FAN_MAIN_CTRL, data->fan_main_ctrl); } else { + u8 ctrl; + /* No on/off mode, set maximum pwm value */ data->pwm_duty[nr] = pwm_to_reg(data, 0xff); it87_write_value(data, IT87_REG_PWM_DUTY[nr], data->pwm_duty[nr]); /* and set manual mode */ - data->pwm_ctrl[nr] = has_newer_autopwm(data) ? - data->pwm_temp_map[nr] : - data->pwm_duty[nr]; - it87_write_value(data, IT87_REG_PWM[nr], - data->pwm_ctrl[nr]); + if (has_newer_autopwm(data)) { + ctrl = (data->pwm_ctrl[nr] & 0x7c) | + data->pwm_temp_map[nr]; + } else { + ctrl = data->pwm_duty[nr]; + } + data->pwm_ctrl[nr] = ctrl; + it87_write_value(data, IT87_REG_PWM[nr], ctrl); } } else { - if (val == 1) /* Manual mode */ - data->pwm_ctrl[nr] = has_newer_autopwm(data) ? - data->pwm_temp_map[nr] : - data->pwm_duty[nr]; - else /* Automatic mode */ - data->pwm_ctrl[nr] = 0x80 | data->pwm_temp_map[nr]; - it87_write_value(data, IT87_REG_PWM[nr], data->pwm_ctrl[nr]); + u8 ctrl; + + if (has_newer_autopwm(data)) { + ctrl = (data->pwm_ctrl[nr] & 0x7c) | + data->pwm_temp_map[nr]; + if (val != 1) + ctrl |= 0x80; + } else { + ctrl = (val == 1 ? data->pwm_duty[nr] : 0x80); + } + data->pwm_ctrl[nr] = ctrl; + it87_write_value(data, IT87_REG_PWM[nr], ctrl); if (data->type != it8603 && nr < 3) { /* set SmartGuardian mode */ @@ -1462,7 +1472,8 @@ static ssize_t set_pwm_temp_map(struct device *dev, * otherwise, just store it for later use. */ if (data->pwm_ctrl[nr] & 0x80) { - data->pwm_ctrl[nr] = 0x80 | data->pwm_temp_map[nr]; + data->pwm_ctrl[nr] = (data->pwm_ctrl[nr] & 0xfc) | + data->pwm_temp_map[nr]; it87_write_value(data, IT87_REG_PWM[nr], data->pwm_ctrl[nr]); } mutex_unlock(&data->update_lock); From 6c95eba9ca104fad6abd758c58b3ac55c44f627d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 8 Feb 2017 14:02:59 -0800 Subject: [PATCH 032/154] hwmon: (it87) Ensure that pwm control cache is current before updating values commit 82dbe987b70042b340f851bdc969a971081e5f02 upstream. If sensor attributes were never read, the pwm control data has not been initiialized, which can cause wrong driver behavior. Ensure that cached data is current before acting on it. Reported-by: Kevin Folz Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/it87.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index 991e0fce781e..43146162c122 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -1354,6 +1354,7 @@ static ssize_t set_pwm(struct device *dev, struct device_attribute *attr, return -EINVAL; mutex_lock(&data->update_lock); + it87_update_pwm_ctrl(data, nr); if (has_newer_autopwm(data)) { /* * If we are in automatic mode, the PWM duty cycle register @@ -1466,6 +1467,7 @@ static ssize_t set_pwm_temp_map(struct device *dev, } mutex_lock(&data->update_lock); + it87_update_pwm_ctrl(data, nr); data->pwm_temp_map[nr] = reg; /* * If we are in automatic mode, write the temp mapping immediately; From 2893a55e39fcfe3b9f5738584db98848b4957d9f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 26 Jan 2017 12:37:32 +0100 Subject: [PATCH 033/154] staging: greybus: loopback: fix broken udelay commit 33b8807a6fe10d0e675e0704444373a6fad93188 upstream. The loopback driver allows the user to set a minimum delay of up to one second to be inserted between test iterations (i.e. request submissions). The delay is currently specified in microseconds and is implemented using udelay. Busy looping for long periods is not just anti-social; udelay must not be used for delays longer than a few milliseconds due to the risk of integer overflow. Replace the broken udelay with a usleep_range with a 100 us range for short delays (< 20 ms) and otherwise revert to using msleep. Fixes: b36f04fa9417 ("greybus: loopback: Convert thread delay to microseconds") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/loopback.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/staging/greybus/loopback.c b/drivers/staging/greybus/loopback.c index 7882306adeca..29dc249b0c74 100644 --- a/drivers/staging/greybus/loopback.c +++ b/drivers/staging/greybus/loopback.c @@ -1051,8 +1051,13 @@ static int gb_loopback_fn(void *data) gb_loopback_calculate_stats(gb, !!error); } gb->send_count++; - if (us_wait) - udelay(us_wait); + + if (us_wait) { + if (us_wait < 20000) + usleep_range(us_wait, us_wait + 100); + else + msleep(us_wait / 1000); + } } gb_pm_runtime_put_autosuspend(bundle); From 148c4526d76e1af4065fac629bff3fcc5019de1c Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Sun, 19 Feb 2017 16:35:59 -0500 Subject: [PATCH 034/154] staging/lustre/lnet: Fix allocation size for sv_cpt_data commit dc7ffefdcc28a45214aa707fdc3df6a5e611ba09 upstream. This is unbreaking another of those "stealth" janitor patches that got in and subtly broke some things. sv_cpt_data is a pointer to pointer, so need to dereference it twice to allocate the correct structure size. Fixes: 9899cb68c6c2 ("Staging: lustre: rpc: Use sizeof type *pointer instead of sizeof type.") CC: Sandhya Bankar Signed-off-by: Oleg Drokin Reviewed-by: James Simmons Reviewed-by: Doug Oucharek Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/lnet/selftest/rpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c index f5619d8744ef..0256d65dfcd8 100644 --- a/drivers/staging/lustre/lnet/selftest/rpc.c +++ b/drivers/staging/lustre/lnet/selftest/rpc.c @@ -252,7 +252,7 @@ srpc_service_init(struct srpc_service *svc) svc->sv_shuttingdown = 0; svc->sv_cpt_data = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(*svc->sv_cpt_data)); + sizeof(**svc->sv_cpt_data)); if (!svc->sv_cpt_data) return -ENOMEM; From c7472b964d9a1bfabe4953175cdbd5ccf44bc416 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jan 2017 15:53:08 +0100 Subject: [PATCH 035/154] staging: rtl: fix possible NULL pointer dereference commit 6e017006022abfea5d2466cad936065f45763ad1 upstream. gcc-7 detects that wlanhdr_to_ethhdr() in two drivers calls memcpy() with a destination argument that an earlier function call may have set to NULL: staging/rtl8188eu/core/rtw_recv.c: In function 'wlanhdr_to_ethhdr': staging/rtl8188eu/core/rtw_recv.c:1318:2: warning: argument 1 null where non-null expected [-Wnonnull] staging/rtl8712/rtl871x_recv.c: In function 'r8712_wlanhdr_to_ethhdr': staging/rtl8712/rtl871x_recv.c:649:2: warning: argument 1 null where non-null expected [-Wnonnull] I'm fixing this by adding a NULL pointer check and returning failure from the function, which is hopefully already handled properly. This seems to date back to when the drivers were originally added, so backporting the fix to stable seems appropriate. There are other related realtek drivers in the kernel, but none of them contain a function with a similar name or produce this warning. Fixes: 1cc18a22b96b ("staging: r8188eu: Add files for new driver - part 5") Fixes: 2865d42c78a9 ("staging: r8712u: Add the new driver to the mainline kernel") Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/core/rtw_recv.c | 3 +++ drivers/staging/rtl8712/rtl871x_recv.c | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/staging/rtl8188eu/core/rtw_recv.c b/drivers/staging/rtl8188eu/core/rtw_recv.c index b87cbbbee054..b39fd1e9b4a0 100644 --- a/drivers/staging/rtl8188eu/core/rtw_recv.c +++ b/drivers/staging/rtl8188eu/core/rtw_recv.c @@ -1383,6 +1383,9 @@ static int wlanhdr_to_ethhdr(struct recv_frame *precvframe) ptr = recvframe_pull(precvframe, (rmv_len-sizeof(struct ethhdr) + (bsnaphdr ? 2 : 0))); } + if (!ptr) + return _FAIL; + memcpy(ptr, pattrib->dst, ETH_ALEN); memcpy(ptr+ETH_ALEN, pattrib->src, ETH_ALEN); diff --git a/drivers/staging/rtl8712/rtl871x_recv.c b/drivers/staging/rtl8712/rtl871x_recv.c index cbd2e51ba42b..cedf25b0b093 100644 --- a/drivers/staging/rtl8712/rtl871x_recv.c +++ b/drivers/staging/rtl8712/rtl871x_recv.c @@ -643,11 +643,16 @@ sint r8712_wlanhdr_to_ethhdr(union recv_frame *precvframe) /* append rx status for mp test packets */ ptr = recvframe_pull(precvframe, (rmv_len - sizeof(struct ethhdr) + 2) - 24); + if (!ptr) + return _FAIL; memcpy(ptr, get_rxmem(precvframe), 24); ptr += 24; - } else + } else { ptr = recvframe_pull(precvframe, (rmv_len - sizeof(struct ethhdr) + (bsnaphdr ? 2 : 0))); + if (!ptr) + return _FAIL; + } memcpy(ptr, pattrib->dst, ETH_ALEN); memcpy(ptr + ETH_ALEN, pattrib->src, ETH_ALEN); From 59cd503c8cfd3f69f8126165b274860bbbd058db Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 16 Jan 2017 18:00:00 +0000 Subject: [PATCH 036/154] coresight: STM: Balance enable/disable commit 4474f4c40a9c607c7317e686b23619b7b768004f upstream. The stm is automatically enabled when an application sets the policy via ->link() call back by using coresight_enable(), which keeps the refcount of the current users of the STM. However, the unlink() callback issues stm_disable() directly, which leaves the STM turned off, without the coresight layer knowing about it. This prevents any further uses of the STM hardware as the coresight layer still thinks the STM is turned on and doesn't enable the hardware when required. Even manually enabling the STM via sysfs can't really enable the hw. e.g, $ echo 1 > $CS_DEVS/$ETR/enable_sink $ mkdir -p $CONFIG_FS/stp-policy/$source.0/stm_test/ $ echo 32768 65535 > $CONFIG_FS/stp-policy/$source.0/stm_test/channels $ echo 64 > $CS_DEVS/$source/traceid $ ./stm_app Sending 64000 byte blocks of pattern 0 at 0us intervals Success to map channel(32768~32783) to 0xffffa95fa000 Sending on channel 32768 $ dd if=/dev/$ETR of=~/trace.bin.1 597+1 records in 597+1 records out 305920 bytes (306 kB) copied, 0.399952 s, 765 kB/s $ ./stm_app Sending 64000 byte blocks of pattern 0 at 0us intervals Success to map channel(32768~32783) to 0xffff7e9e2000 Sending on channel 32768 $ dd if=/dev/$ETR of=~/trace.bin.2 0+0 records in 0+0 records out 0 bytes (0 B) copied, 0.0232083 s, 0.0 kB/s Note that we don't get any data from the ETR for the second session. Also dmesg shows : [ 77.520458] coresight-tmc 20800000.etr: TMC-ETR enabled [ 77.537097] coresight-replicator etr_replicator@20890000: REPLICATOR enabled [ 77.558828] coresight-replicator main_replicator@208a0000: REPLICATOR enabled [ 77.581068] coresight-funnel 208c0000.main_funnel: FUNNEL inport 0 enabled [ 77.602217] coresight-tmc 20840000.etf: TMC-ETF enabled [ 77.618422] coresight-stm 20860000.stm: STM tracing enabled [ 139.554252] coresight-stm 20860000.stm: STM tracing disabled # End of first tracing session [ 146.351135] coresight-tmc 20800000.etr: TMC read start [ 146.514486] coresight-tmc 20800000.etr: TMC read end # Note that the STM is not turned on via stm_generic_link()->coresight_enable() # and hence none of the components are turned on. [ 152.479080] coresight-tmc 20800000.etr: TMC read start [ 152.542632] coresight-tmc 20800000.etr: TMC read end This patch fixes the problem by balancing the unlink operation by using the coresight_disable(), keeping the coresight layer in sync with the hardware state and thus allowing normal usage of the STM component. Fixes: commit 237483aa5cf43 ("coresight: stm: adding driver for CoreSight STM component") Cc: Pratik Patel Cc: Greg Kroah-Hartman Acked-by: Mathieu Poirier Reviewed-by: Chunyan Zhang Reported-by: Robert Walker Signed-off-by: Suzuki K Poulose Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-stm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c index 49e0f1b925a5..8e7905632d25 100644 --- a/drivers/hwtracing/coresight/coresight-stm.c +++ b/drivers/hwtracing/coresight/coresight-stm.c @@ -356,7 +356,7 @@ static void stm_generic_unlink(struct stm_data *stm_data, if (!drvdata || !drvdata->csdev) return; - stm_disable(drvdata->csdev, NULL); + coresight_disable(drvdata->csdev); } static phys_addr_t From 2f714ba1282aa6f5f0158329a1b2d3858698618c Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 14 Feb 2017 17:31:03 +0200 Subject: [PATCH 037/154] regulator: Fix regulator_summary for deviceless consumers commit e42a46b6f52473661ad192f76a128a68fe301df4 upstream. It is allowed to call regulator_get with a NULL dev argument (_regulator_get explicitly checks for it) but this causes an error later when printing /sys/kernel/debug/regulator_summary. Fix this by explicitly handling "deviceless" consumers in the debugfs code. Signed-off-by: Leonard Crestez Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 5c1519b229e0..9faccfceb53c 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -4357,12 +4357,13 @@ static void regulator_summary_show_subtree(struct seq_file *s, seq_puts(s, "\n"); list_for_each_entry(consumer, &rdev->consumer_list, list) { - if (consumer->dev->class == ®ulator_class) + if (consumer->dev && consumer->dev->class == ®ulator_class) continue; seq_printf(s, "%*s%-*s ", (level + 1) * 3 + 1, "", - 30 - (level + 1) * 3, dev_name(consumer->dev)); + 30 - (level + 1) * 3, + consumer->dev ? dev_name(consumer->dev) : "deviceless"); switch (rdev->desc->type) { case REGULATOR_VOLTAGE: From 84c2697c9cd3c4ed05945d80fdfd4b7e906dd074 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 7 Feb 2017 15:51:47 +0000 Subject: [PATCH 038/154] tpm_tis: fix the error handling of init_tis() commit 5939eaf4f9d432586dd2cdeea778506471e8088e upstream. Add the missing platform_driver_unregister() and remove the duplicate platform_device_unregister(force_pdev) in the error handling case. Fixes: 00194826e6be ("tpm_tis: Clean up the force=1 module parameter") Signed-off-by: Wei Yongjun Reviewed-by: Jason Gunthorpe Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index eaf5730d79eb..8022bea27fed 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -421,7 +421,7 @@ err_pnp: acpi_bus_unregister_driver(&tis_acpi_driver); err_acpi: #endif - platform_device_unregister(force_pdev); + platform_driver_unregister(&tis_drv); err_platform: if (force_pdev) platform_device_unregister(force_pdev); From 61cb3c6357fd77a2b1af49a9015955b9f82c68d4 Mon Sep 17 00:00:00 2001 From: CQ Tang Date: Mon, 30 Jan 2017 09:39:52 -0800 Subject: [PATCH 039/154] iommu/vt-d: Fix some macros that are incorrectly specified in intel-iommu commit aaa59306b0b7e0ca4ba92cc04c5db101cbb1c096 upstream. Some of the macros are incorrect with wrong bit-shifts resulting in picking the incorrect invalidation granularity. Incorrect Source-ID in extended devtlb invalidation caused device side errors. To: Joerg Roedel To: David Woodhouse Cc: iommu@lists.linux-foundation.org Cc: linux-kernel@vger.kernel.org Cc: CQ Tang Cc: Ashok Raj Fixes: 2f26e0a9 ("iommu/vt-d: Add basic SVM PASID support") Signed-off-by: CQ Tang Signed-off-by: Ashok Raj Tested-by: CQ Tang Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- include/linux/intel-iommu.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index d49e26c6cdc7..23e129ef6726 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -153,8 +153,8 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) #define DMA_TLB_DSI_FLUSH (((u64)2) << 60) #define DMA_TLB_PSI_FLUSH (((u64)3) << 60) -#define DMA_TLB_IIRG(type) ((type >> 60) & 7) -#define DMA_TLB_IAIG(val) (((val) >> 57) & 7) +#define DMA_TLB_IIRG(type) ((type >> 60) & 3) +#define DMA_TLB_IAIG(val) (((val) >> 57) & 3) #define DMA_TLB_READ_DRAIN (((u64)1) << 49) #define DMA_TLB_WRITE_DRAIN (((u64)1) << 48) #define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32) @@ -164,9 +164,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) /* INVALID_DESC */ #define DMA_CCMD_INVL_GRANU_OFFSET 61 -#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3) -#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3) -#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3) +#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 4) +#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 4) +#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 4) #define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7) #define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6) #define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16))) @@ -316,8 +316,8 @@ enum { #define QI_DEV_EIOTLB_SIZE (((u64)1) << 11) #define QI_DEV_EIOTLB_GLOB(g) ((u64)g) #define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32) -#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32) -#define QI_DEV_EIOTLB_QDEP(qd) (((qd) & 0x1f) << 16) +#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16) +#define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4) #define QI_DEV_EIOTLB_MAX_INVS 32 #define QI_PGRP_IDX(idx) (((u64)(idx)) << 55) From 24427cd71d2fbef3eeb087e802ff327c18c7275f Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Mon, 30 Jan 2017 09:39:53 -0800 Subject: [PATCH 040/154] iommu/vt-d: Tylersburg isoch identity map check is done too late. commit 21e722c4c8377b5bc82ad058fed12165af739c1b upstream. The check to set identity map for tylersburg is done too late. It needs to be done before the check for identity_map domain is done. To: Joerg Roedel To: David Woodhouse Cc: iommu@lists.linux-foundation.org Cc: linux-kernel@vger.kernel.org Cc: Ashok Raj Fixes: 86080ccc22 ("iommu/vt-d: Allocate si_domain in init_dmars()") Signed-off-by: Ashok Raj Reported-by: Yunhong Jiang Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index d82637ab09fd..34be95ee9038 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3325,13 +3325,14 @@ static int __init init_dmars(void) iommu_identity_mapping |= IDENTMAP_GFX; #endif + check_tylersburg_isoch(); + if (iommu_identity_mapping) { ret = si_domain_init(hw_pass_through); if (ret) goto free_iommu; } - check_tylersburg_isoch(); /* * If we copied translations from a previous kernel in the kdump From c06d74df4ebb9ab3fedcf6d623816944edd206f5 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 19 Jan 2017 13:53:15 -0800 Subject: [PATCH 041/154] CIFS: Fix splice read for non-cached files commit 9c25702cee1405099f982894c865c163de7909a8 upstream. Currently we call copy_page_to_iter() for uncached reading into a pipe. This is wrong because it treats pages as VFS cache pages and copies references rather than actual data. When we are trying to read from the pipe we end up calling page_cache_pipe_buf_confirm() which returns -ENODATA. This error is translated into 0 which is returned to a user. This issue is reproduced by running xfs-tests suite (generic test #249) against mount points with "cache=none". Fix it by mapping pages manually and calling copy_to_iter() that copies data into the pipe. Signed-off-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/file.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 18a1e1d6671f..1cd0e2eefc66 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2884,7 +2884,15 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter) for (i = 0; i < rdata->nr_pages; i++) { struct page *page = rdata->pages[i]; size_t copy = min_t(size_t, remaining, PAGE_SIZE); - size_t written = copy_page_to_iter(page, 0, copy, iter); + size_t written; + + if (unlikely(iter->type & ITER_PIPE)) { + void *addr = kmap_atomic(page); + + written = copy_to_iter(addr, copy, iter); + kunmap_atomic(addr); + } else + written = copy_page_to_iter(page, 0, copy, iter); remaining -= written; if (written < copy && iov_iter_count(iter) > 0) break; From f1faaec4843a6bfcb795887f5b1a280fb874789d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 24 Feb 2017 14:55:45 -0800 Subject: [PATCH 042/154] mm, devm_memremap_pages: hold device_hotplug lock over mem_hotplug_{begin, done} commit b5d24fda9c3dce51fcb4eee459550a458eaaf1e2 upstream. The mem_hotplug_{begin,done} lock coordinates with {get,put}_online_mems() to hold off "readers" of the current state of memory from new hotplug actions. mem_hotplug_begin() expects exclusive access, via the device_hotplug lock, to set mem_hotplug.active_writer. Calling mem_hotplug_begin() without locking device_hotplug can lead to corrupting mem_hotplug.refcount and missed wakeups / soft lockups. [dan.j.williams@intel.com: v2] Link: http://lkml.kernel.org/r/148728203365.38457.17804568297887708345.stgit@dwillia2-desk3.amr.corp.intel.com Link: http://lkml.kernel.org/r/148693885680.16345.17802627926777862337.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: f931ab479dd2 ("mm: fix devm_memremap_pages crash, use mem_hotplug_{begin, done}") Signed-off-by: Dan Williams Reported-by: Ben Hutchings Cc: Michal Hocko Cc: Toshi Kani Cc: Vlastimil Babka Cc: Logan Gunthorpe Cc: Masayoshi Mizuma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/memremap.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/memremap.c b/kernel/memremap.c index 9ecedc28b928..06123234f118 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -246,9 +246,13 @@ static void devm_memremap_pages_release(struct device *dev, void *data) /* pages are dead and unused, undo the arch mapping */ align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(resource_size(res), SECTION_SIZE); + + lock_device_hotplug(); mem_hotplug_begin(); arch_remove_memory(align_start, align_size); mem_hotplug_done(); + unlock_device_hotplug(); + untrack_pfn(NULL, PHYS_PFN(align_start), align_size); pgmap_radix_release(res); dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, @@ -360,9 +364,11 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (error) goto err_pfn_remap; + lock_device_hotplug(); mem_hotplug_begin(); error = arch_add_memory(nid, align_start, align_size, true); mem_hotplug_done(); + unlock_device_hotplug(); if (error) goto err_add_memory; From d1e8042628a31e28c8e92411ab83a28bd5e3f7c9 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 24 Feb 2017 14:59:33 -0800 Subject: [PATCH 043/154] mm/page_alloc: fix nodes for reclaim in fast path commit e02dc017c3032dcdce1b993af0db135462e1b4b7 upstream. When @node_reclaim_node isn't 0, the page allocator tries to reclaim pages if the amount of free memory in the zones are below the low watermark. On Power platform, none of NUMA nodes are scanned for page reclaim because no nodes match the condition in zone_allows_reclaim(). On Power platform, RECLAIM_DISTANCE is set to 10 which is the distance of Node-A to Node-A. So the preferred node even won't be scanned for page reclaim. __alloc_pages_nodemask() get_page_from_freelist() zone_allows_reclaim() Anton proposed the test code as below: # cat alloc.c : int main(int argc, char *argv[]) { void *p; unsigned long size; unsigned long start, end; start = time(NULL); size = strtoul(argv[1], NULL, 0); printf("To allocate %ldGB memory\n", size); size <<= 30; p = malloc(size); assert(p); memset(p, 0, size); end = time(NULL); printf("Used time: %ld seconds\n", end - start); sleep(3600); return 0; } The system I use for testing has two NUMA nodes. Both have 128GB memory. In below scnario, the page caches on node#0 should be reclaimed when it encounters pressure to accommodate request of allocation. # echo 2 > /proc/sys/vm/zone_reclaim_mode; \ sync; \ echo 3 > /proc/sys/vm/drop_caches; \ # taskset -c 0 cat file.32G > /dev/null; \ grep FilePages /sys/devices/system/node/node0/meminfo Node 0 FilePages: 33619712 kB # taskset -c 0 ./alloc 128 # grep FilePages /sys/devices/system/node/node0/meminfo Node 0 FilePages: 33619840 kB # grep MemFree /sys/devices/system/node/node0/meminfo Node 0 MemFree: 186816 kB With the patch applied, the pagecache on node-0 is reclaimed when its free memory is running out. It's the expected behaviour. # echo 2 > /proc/sys/vm/zone_reclaim_mode; \ sync; \ echo 3 > /proc/sys/vm/drop_caches # taskset -c 0 cat file.32G > /dev/null; \ grep FilePages /sys/devices/system/node/node0/meminfo Node 0 FilePages: 33605568 kB # taskset -c 0 ./alloc 128 # grep FilePages /sys/devices/system/node/node0/meminfo Node 0 FilePages: 1379520 kB # grep MemFree /sys/devices/system/node/node0/meminfo Node 0 MemFree: 317120 kB Fixes: 5f7a75acdb24 ("mm: page_alloc: do not cache reclaim distances") Link: http://lkml.kernel.org/r/1486532455-29613-1-git-send-email-gwshan@linux.vnet.ibm.com Signed-off-by: Gavin Shan Acked-by: Mel Gorman Acked-by: Michal Hocko Cc: Anton Blanchard Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f4a02e240fb6..1460e6ad5e14 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2858,7 +2858,7 @@ bool zone_watermark_ok_safe(struct zone *z, unsigned int order, #ifdef CONFIG_NUMA static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) { - return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) < + return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <= RECLAIM_DISTANCE; } #else /* CONFIG_NUMA */ From 58d1dbb904ba9ce1f8ac99c5d210ad7bb14eed08 Mon Sep 17 00:00:00 2001 From: Vinayak Menon Date: Fri, 24 Feb 2017 14:59:39 -0800 Subject: [PATCH 044/154] mm: vmpressure: fix sending wrong events on underflow commit e1587a4945408faa58d0485002c110eb2454740c upstream. At the end of a window period, if the reclaimed pages is greater than scanned, an unsigned underflow can result in a huge pressure value and thus a critical event. Reclaimed pages is found to go higher than scanned because of the addition of reclaimed slab pages to reclaimed in shrink_node without a corresponding increment to scanned pages. Minchan Kim mentioned that this can also happen in the case of a THP page where the scanned is 1 and reclaimed could be 512. Link: http://lkml.kernel.org/r/1486641577-11685-1-git-send-email-vinmenon@codeaurora.org Signed-off-by: Vinayak Menon Acked-by: Minchan Kim Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Mel Gorman Cc: Vlastimil Babka Cc: Rik van Riel Cc: Vladimir Davydov Cc: Anton Vorontsov Cc: Shiraz Hashim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/vmpressure.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 149fdf6c5c56..6063581f705c 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -112,8 +112,15 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned, unsigned long reclaimed) { unsigned long scale = scanned + reclaimed; - unsigned long pressure; + unsigned long pressure = 0; + /* + * reclaimed can be greater than scanned in cases + * like THP, where the scanned is 1 and reclaimed + * could be 512 + */ + if (reclaimed >= scanned) + goto out; /* * We calculate the ratio (in percents) of how many pages were * scanned vs. reclaimed in a given time frame (window). Note that @@ -124,6 +131,7 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned, pressure = scale - (reclaimed * scale / scanned); pressure = pressure * 100 / scale; +out: pr_debug("%s: %3lu (s: %lu r: %lu)\n", __func__, pressure, scanned, reclaimed); From 2c290eede9b6375ad15025aa85a7c07c3ce1a3f3 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Fri, 24 Feb 2017 14:59:59 -0800 Subject: [PATCH 045/154] mm: do not access page->mapping directly on page_endio commit dd8416c47715cf324c9a16f13273f9fda87acfed upstream. With rw_page, page_endio is used for completing IO on a page and it propagates write error to the address space if the IO fails. The problem is it accesses page->mapping directly which might be okay for file-backed pages but it shouldn't for anonymous page. Otherwise, it can corrupt one of field from anon_vma under us and system goes panic randomly. swap_writepage bdev_writepage ops->rw_page I encountered the BUG during developing new zram feature and it was really hard to figure it out because it made random crash, somtime mmap_sem lockdep, sometime other places where places never related to zram/zsmalloc, and not reproducible with some configuration. When I consider how that bug is subtle and people do fast-swap test with brd, it's worth to add stable mark, I think. Fixes: dd6bd0d9c7db ("swap: use bdev_read_page() / bdev_write_page()") Signed-off-by: Minchan Kim Acked-by: Michal Hocko Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/filemap.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index d8d7df82c69a..edfb90e3830c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -910,9 +910,12 @@ void page_endio(struct page *page, bool is_write, int err) unlock_page(page); } else { if (err) { + struct address_space *mapping; + SetPageError(page); - if (page->mapping) - mapping_set_error(page->mapping, err); + mapping = page_mapping(page); + if (mapping) + mapping_set_error(mapping, err); } end_page_writeback(page); } From 8f6620e391a43d5da24fa09f98d9b51016545470 Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Fri, 24 Feb 2017 15:00:40 -0800 Subject: [PATCH 046/154] mm balloon: umount balloon_mnt when removing vb device commit 9c57b5808c625f4fc93da330b932647eaff321f7 upstream. With CONFIG_BALLOON_COMPACTION=y the kernel will mount balloon_mnt for balloon page migration when we probe a virtio_balloon device. However we do not unmount it when removing the device. Fix this. Fixes: b1123ea6d3b3 ("mm: balloon: use general non-lru movable page feature") Link: http://lkml.kernel.org/r/1486531318-35189-1-git-send-email-xieyisheng1@huawei.com Signed-off-by: Yisheng Xie Acked-by: Minchan Kim Cc: Rafael Aquini Cc: Konstantin Khlebnikov Cc: Gioh Kim Cc: Vlastimil Babka Cc: Michal Hocko Cc: Michael S. Tsirkin Cc: Jason Wang Cc: Hanjun Guo Cc: Xishi Qiu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_balloon.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 181793f07852..9d2738e9217f 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -615,8 +615,12 @@ static void virtballoon_remove(struct virtio_device *vdev) cancel_work_sync(&vb->update_balloon_stats_work); remove_common(vb); +#ifdef CONFIG_BALLOON_COMPACTION if (vb->vb_dev_info.inode) iput(vb->vb_dev_info.inode); + + kern_unmount(balloon_mnt); +#endif kfree(vb); } From 710531320af876192d76b2c1f68190a1df941b02 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 22 Feb 2017 15:45:58 -0800 Subject: [PATCH 047/154] mm, vmscan: cleanup lru size claculations commit fd538803731e50367b7c59ce4ad3454426a3d671 upstream. lruvec_lru_size returns the full size of the LRU list while we sometimes need a value reduced only to eligible zones (e.g. for lowmem requests). inactive_list_is_low is one such user. Later patches will add more of them. Add a new parameter to lruvec_lru_size and allow it filter out zones which are not eligible for the given context. Link: http://lkml.kernel.org/r/20170117103702.28542-2-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Johannes Weiner Acked-by: Hillf Danton Acked-by: Minchan Kim Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mmzone.h | 2 +- mm/vmscan.c | 81 ++++++++++++++++++++---------------------- mm/workingset.c | 2 +- 3 files changed, 41 insertions(+), 44 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f99c993dd500..7e273e243a13 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -779,7 +779,7 @@ static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec) #endif } -extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru); +extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx); #ifdef CONFIG_HAVE_MEMORY_PRESENT void memory_present(int nid, unsigned long start, unsigned long end); diff --git a/mm/vmscan.c b/mm/vmscan.c index fa30010a5277..cd516c632e8f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -234,22 +234,39 @@ bool pgdat_reclaimable(struct pglist_data *pgdat) pgdat_reclaimable_pages(pgdat) * 6; } -unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru) +/** + * lruvec_lru_size - Returns the number of pages on the given LRU list. + * @lruvec: lru vector + * @lru: lru to use + * @zone_idx: zones to consider (use MAX_NR_ZONES for the whole LRU list) + */ +unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx) { + unsigned long lru_size; + int zid; + if (!mem_cgroup_disabled()) - return mem_cgroup_get_lru_size(lruvec, lru); + lru_size = mem_cgroup_get_lru_size(lruvec, lru); + else + lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru); - return node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru); -} + for (zid = zone_idx + 1; zid < MAX_NR_ZONES; zid++) { + struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid]; + unsigned long size; -unsigned long lruvec_zone_lru_size(struct lruvec *lruvec, enum lru_list lru, - int zone_idx) -{ - if (!mem_cgroup_disabled()) - return mem_cgroup_get_zone_lru_size(lruvec, lru, zone_idx); + if (!managed_zone(zone)) + continue; + + if (!mem_cgroup_disabled()) + size = mem_cgroup_get_zone_lru_size(lruvec, lru, zid); + else + size = zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zid], + NR_ZONE_LRU_BASE + lru); + lru_size -= min(size, lru_size); + } + + return lru_size; - return zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zone_idx], - NR_ZONE_LRU_BASE + lru); } /* @@ -2028,11 +2045,10 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, struct scan_control *sc) { unsigned long inactive_ratio; - unsigned long inactive; - unsigned long active; + unsigned long inactive, active; + enum lru_list inactive_lru = file * LRU_FILE; + enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE; unsigned long gb; - struct pglist_data *pgdat = lruvec_pgdat(lruvec); - int zid; /* * If we don't have swap space, anonymous page deactivation @@ -2041,27 +2057,8 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, if (!file && !total_swap_pages) return false; - inactive = lruvec_lru_size(lruvec, file * LRU_FILE); - active = lruvec_lru_size(lruvec, file * LRU_FILE + LRU_ACTIVE); - - /* - * For zone-constrained allocations, it is necessary to check if - * deactivations are required for lowmem to be reclaimed. This - * calculates the inactive/active pages available in eligible zones. - */ - for (zid = sc->reclaim_idx + 1; zid < MAX_NR_ZONES; zid++) { - struct zone *zone = &pgdat->node_zones[zid]; - unsigned long inactive_zone, active_zone; - - if (!managed_zone(zone)) - continue; - - inactive_zone = lruvec_zone_lru_size(lruvec, file * LRU_FILE, zid); - active_zone = lruvec_zone_lru_size(lruvec, (file * LRU_FILE) + LRU_ACTIVE, zid); - - inactive -= min(inactive, inactive_zone); - active -= min(active, active_zone); - } + inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx); + active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx); gb = (inactive + active) >> (30 - PAGE_SHIFT); if (gb) @@ -2208,7 +2205,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * system is under heavy pressure. */ if (!inactive_list_is_low(lruvec, true, sc) && - lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) { + lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES) >> sc->priority) { scan_balance = SCAN_FILE; goto out; } @@ -2234,10 +2231,10 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * anon in [0], file in [1] */ - anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON) + - lruvec_lru_size(lruvec, LRU_INACTIVE_ANON); - file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE) + - lruvec_lru_size(lruvec, LRU_INACTIVE_FILE); + anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) + + lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES); + file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES) + + lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES); spin_lock_irq(&pgdat->lru_lock); if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { @@ -2275,7 +2272,7 @@ out: unsigned long size; unsigned long scan; - size = lruvec_lru_size(lruvec, lru); + size = lruvec_lru_size(lruvec, lru, MAX_NR_ZONES); scan = size >> sc->priority; if (!scan && pass && force_scan) diff --git a/mm/workingset.c b/mm/workingset.c index fb1f9183d89a..33f6f4db32fd 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -266,7 +266,7 @@ bool workingset_refault(void *shadow) } lruvec = mem_cgroup_lruvec(pgdat, memcg); refault = atomic_long_read(&lruvec->inactive_age); - active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE); + active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES); rcu_read_unlock(); /* From 521e92b198a89d3186930578e036d3b58a767081 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 22 Feb 2017 15:46:01 -0800 Subject: [PATCH 048/154] mm, vmscan: consider eligible zones in get_scan_count commit 71ab6cfe88dcf9f6e6a65eb85cf2bda20a257682 upstream. get_scan_count() considers the whole node LRU size when - doing SCAN_FILE due to many page cache inactive pages - calculating the number of pages to scan In both cases this might lead to unexpected behavior especially on 32b systems where we can expect lowmem memory pressure very often. A large highmem zone can easily distort SCAN_FILE heuristic because there might be only few file pages from the eligible zones on the node lru and we would still enforce file lru scanning which can lead to trashing while we could still scan anonymous pages. The later use of lruvec_lru_size can be problematic as well. Especially when there are not many pages from the eligible zones. We would have to skip over many pages to find anything to reclaim but shrink_node_memcg would only reduce the remaining number to scan by SWAP_CLUSTER_MAX at maximum. Therefore we can end up going over a large LRU many times without actually having chance to reclaim much if anything at all. The closer we are out of memory on lowmem zone the worse the problem will be. Fix this by filtering out all the ineligible zones when calculating the lru size for both paths and consider only sc->reclaim_idx zones. The patch would need to be tweaked a bit to apply to 4.10 and older but I will do that as soon as it hits the Linus tree in the next merge window. Link: http://lkml.kernel.org/r/20170117103702.28542-3-mhocko@kernel.org Fixes: b2e18757f2c9 ("mm, vmscan: begin reclaiming pages on a per-node basis") Signed-off-by: Michal Hocko Tested-by: Trevor Cordes Acked-by: Minchan Kim Acked-by: Hillf Danton Acked-by: Mel Gorman Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/vmscan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index cd516c632e8f..30a88b945a44 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2205,7 +2205,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * system is under heavy pressure. */ if (!inactive_list_is_low(lruvec, true, sc) && - lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES) >> sc->priority) { + lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) { scan_balance = SCAN_FILE; goto out; } @@ -2272,7 +2272,7 @@ out: unsigned long size; unsigned long scan; - size = lruvec_lru_size(lruvec, lru, MAX_NR_ZONES); + size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); scan = size >> sc->priority; if (!scan && pass && force_scan) From 6d94a6b32e97c56dfbe96cc489517b338608f091 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Mon, 27 Feb 2017 14:27:25 -0800 Subject: [PATCH 049/154] sigaltstack: support SS_AUTODISARM for CONFIG_COMPAT commit 441398d378f29a5ad6d0fcda07918e54e4961800 upstream. Currently SS_AUTODISARM is not supported in compatibility mode, but does not return -EINVAL either. This makes dosemu built with -m32 on x86_64 to crash. Also the kernel's sigaltstack selftest fails if compiled with -m32. This patch adds the needed support. Link: http://lkml.kernel.org/r/20170205101213.8163-2-stsp@list.ru Signed-off-by: Stas Sergeev Cc: Milosz Tanski Cc: Andy Lutomirski Cc: Al Viro Cc: Arnd Bergmann Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Oleg Nesterov Cc: Nicolas Pitre Cc: Waiman Long Cc: Dave Hansen Cc: Dmitry Safonov Cc: Wang Xiaoqiang Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/compat.h | 4 +++- kernel/signal.c | 11 +++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/linux/compat.h b/include/linux/compat.h index 63609398ef9f..d8535a430caf 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -711,8 +711,10 @@ int __compat_save_altstack(compat_stack_t __user *, unsigned long); compat_stack_t __user *__uss = uss; \ struct task_struct *t = current; \ put_user_ex(ptr_to_compat((void __user *)t->sas_ss_sp), &__uss->ss_sp); \ - put_user_ex(sas_ss_flags(sp), &__uss->ss_flags); \ + put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \ put_user_ex(t->sas_ss_size, &__uss->ss_size); \ + if (t->sas_ss_flags & SS_AUTODISARM) \ + sas_ss_reset(t); \ } while (0); asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, diff --git a/kernel/signal.c b/kernel/signal.c index 75761acc77cf..0b1415720a15 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3226,10 +3226,17 @@ int compat_restore_altstack(const compat_stack_t __user *uss) int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp) { + int err; struct task_struct *t = current; - return __put_user(ptr_to_compat((void __user *)t->sas_ss_sp), &uss->ss_sp) | - __put_user(sas_ss_flags(sp), &uss->ss_flags) | + err = __put_user(ptr_to_compat((void __user *)t->sas_ss_sp), + &uss->ss_sp) | + __put_user(t->sas_ss_flags, &uss->ss_flags) | __put_user(t->sas_ss_size, &uss->ss_size); + if (err) + return err; + if (t->sas_ss_flags & SS_AUTODISARM) + sas_ss_reset(t); + return 0; } #endif From 270e84a1e6effd6c0c6e9b13b196b5fdaa392954 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 27 Feb 2017 14:28:24 -0800 Subject: [PATCH 050/154] ipc/shm: Fix shmat mmap nil-page protection commit 95e91b831f87ac8e1f8ed50c14d709089b4e01b8 upstream. The issue is described here, with a nice testcase: https://bugzilla.kernel.org/show_bug.cgi?id=192931 The problem is that shmat() calls do_mmap_pgoff() with MAP_FIXED, and the address rounded down to 0. For the regular mmap case, the protection mentioned above is that the kernel gets to generate the address -- arch_get_unmapped_area() will always check for MAP_FIXED and return that address. So by the time we do security_mmap_addr(0) things get funky for shmat(). The testcase itself shows that while a regular user crashes, root will not have a problem attaching a nil-page. There are two possible fixes to this. The first, and which this patch does, is to simply allow root to crash as well -- this is also regular mmap behavior, ie when hacking up the testcase and adding mmap(... |MAP_FIXED). While this approach is the safer option, the second alternative is to ignore SHM_RND if the rounded address is 0, thus only having MAP_SHARED flags. This makes the behavior of shmat() identical to the mmap() case. The downside of this is obviously user visible, but does make sense in that it maintains semantics after the round-down wrt 0 address and mmap. Passes shm related ltp tests. Link: http://lkml.kernel.org/r/1486050195-18629-1-git-send-email-dave@stgolabs.net Signed-off-by: Davidlohr Bueso Reported-by: Gareth Evans Cc: Manfred Spraul Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- ipc/shm.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/ipc/shm.c b/ipc/shm.c index dbac8860c721..e2072ae4f90e 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1085,8 +1085,8 @@ out_unlock1: * "raddr" thing points to kernel space, and there has to be a wrapper around * this. */ -long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, - unsigned long shmlba) +long do_shmat(int shmid, char __user *shmaddr, int shmflg, + ulong *raddr, unsigned long shmlba) { struct shmid_kernel *shp; unsigned long addr; @@ -1107,8 +1107,13 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, goto out; else if ((addr = (ulong)shmaddr)) { if (addr & (shmlba - 1)) { - if (shmflg & SHM_RND) - addr &= ~(shmlba - 1); /* round down */ + /* + * Round down to the nearest multiple of shmlba. + * For sane do_mmap_pgoff() parameters, avoid + * round downs that trigger nil-page and MAP_FIXED. + */ + if ((shmflg & SHM_RND) && addr >= shmlba) + addr &= ~(shmlba - 1); else #ifndef __ARCH_FORCE_SHMLBA if (addr & ~PAGE_MASK) From d9cc31683a16f5619217d80c3d8e608c23c41afc Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 17 Jan 2017 06:45:41 -0500 Subject: [PATCH 051/154] ima: fix ima_d_path() possible race with rename commit bc15ed663e7e53ee4dc3e60f8d09c93a0528c694 upstream. On failure to return a pathname from ima_d_path(), a pointer to dname is returned, which is subsequently used in the IMA measurement list, the IMA audit records, and other audit logging. Saving the pointer to dname for later use has the potential to race with rename. Intead of returning a pointer to dname on failure, this patch returns a pointer to a copy of the filename. Reported-by: Al Viro Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/ima.h | 2 +- security/integrity/ima/ima_api.c | 20 ++++++++++++++++++-- security/integrity/ima/ima_main.c | 8 +++++--- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index db25f54a04fe..df7834aa1b8f 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -173,7 +173,7 @@ int ima_store_template(struct ima_template_entry *entry, int violation, struct inode *inode, const unsigned char *filename, int pcr); void ima_free_template_entry(struct ima_template_entry *entry); -const char *ima_d_path(const struct path *path, char **pathbuf); +const char *ima_d_path(const struct path *path, char **pathbuf, char *filename); /* IMA policy related functions */ int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask, diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index 9df26a2b75ba..d01a52f8f708 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -318,7 +318,17 @@ void ima_audit_measurement(struct integrity_iint_cache *iint, iint->flags |= IMA_AUDITED; } -const char *ima_d_path(const struct path *path, char **pathbuf) +/* + * ima_d_path - return a pointer to the full pathname + * + * Attempt to return a pointer to the full pathname for use in the + * IMA measurement list, IMA audit records, and auditing logs. + * + * On failure, return a pointer to a copy of the filename, not dname. + * Returning a pointer to dname, could result in using the pointer + * after the memory has been freed. + */ +const char *ima_d_path(const struct path *path, char **pathbuf, char *namebuf) { char *pathname = NULL; @@ -331,5 +341,11 @@ const char *ima_d_path(const struct path *path, char **pathbuf) pathname = NULL; } } - return pathname ?: (const char *)path->dentry->d_name.name; + + if (!pathname) { + strlcpy(namebuf, path->dentry->d_name.name, NAME_MAX); + pathname = namebuf; + } + + return pathname; } diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 423d111b3b94..0e8762945e79 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -83,6 +83,7 @@ static void ima_rdwr_violation_check(struct file *file, const char **pathname) { struct inode *inode = file_inode(file); + char filename[NAME_MAX]; fmode_t mode = file->f_mode; bool send_tomtou = false, send_writers = false; @@ -102,7 +103,7 @@ static void ima_rdwr_violation_check(struct file *file, if (!send_tomtou && !send_writers) return; - *pathname = ima_d_path(&file->f_path, pathbuf); + *pathname = ima_d_path(&file->f_path, pathbuf, filename); if (send_tomtou) ima_add_violation(file, *pathname, iint, @@ -161,6 +162,7 @@ static int process_measurement(struct file *file, char *buf, loff_t size, struct integrity_iint_cache *iint = NULL; struct ima_template_desc *template_desc; char *pathbuf = NULL; + char filename[NAME_MAX]; const char *pathname = NULL; int rc = -ENOMEM, action, must_appraise; int pcr = CONFIG_IMA_MEASURE_PCR_IDX; @@ -239,8 +241,8 @@ static int process_measurement(struct file *file, char *buf, loff_t size, goto out_digsig; } - if (!pathname) /* ima_rdwr_violation possibly pre-fetched */ - pathname = ima_d_path(&file->f_path, &pathbuf); + if (!pathbuf) /* ima_rdwr_violation possibly pre-fetched */ + pathname = ima_d_path(&file->f_path, &pathbuf, filename); if (action & IMA_MEASURE) ima_store_measurement(iint, file, pathname, From 2294b771a4b425d9365aabeaff8d33ccd0c0fffe Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 31 Jan 2017 15:38:16 +0900 Subject: [PATCH 052/154] PM / devfreq: Fix available_governor sysfs commit bcf23c79c4e46130701370af4383b61a3cba755c upstream. The devfreq using passive governor is not able to change the governor. So, the user can not change the governor through 'available_governor' sysfs entry. Also, the devfreq which don't use the passive governor is not able to change to 'passive' governor on the fly. Fixes: 996133119f57 ("PM / devfreq: Add new passive governor") Signed-off-by: Chanwoo Choi Signed-off-by: MyungJoo Ham Signed-off-by: Greg Kroah-Hartman --- drivers/devfreq/devfreq.c | 31 ++++++++++++++++++++++++++---- drivers/devfreq/governor_passive.c | 1 + include/linux/devfreq.h | 3 +++ 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 712592cef1a2..2eacfb4b905f 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -939,6 +939,9 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr, if (df->governor == governor) { ret = 0; goto out; + } else if (df->governor->immutable || governor->immutable) { + ret = -EINVAL; + goto out; } if (df->governor) { @@ -968,13 +971,33 @@ static ssize_t available_governors_show(struct device *d, struct device_attribute *attr, char *buf) { - struct devfreq_governor *tmp_governor; + struct devfreq *df = to_devfreq(d); ssize_t count = 0; mutex_lock(&devfreq_list_lock); - list_for_each_entry(tmp_governor, &devfreq_governor_list, node) - count += scnprintf(&buf[count], (PAGE_SIZE - count - 2), - "%s ", tmp_governor->name); + + /* + * The devfreq with immutable governor (e.g., passive) shows + * only own governor. + */ + if (df->governor->immutable) { + count = scnprintf(&buf[count], DEVFREQ_NAME_LEN, + "%s ", df->governor_name); + /* + * The devfreq device shows the registered governor except for + * immutable governors such as passive governor . + */ + } else { + struct devfreq_governor *governor; + + list_for_each_entry(governor, &devfreq_governor_list, node) { + if (governor->immutable) + continue; + count += scnprintf(&buf[count], (PAGE_SIZE - count - 2), + "%s ", governor->name); + } + } + mutex_unlock(&devfreq_list_lock); /* Truncate the trailing space */ diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index 9ef46e2592c4..93795b32dc09 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -179,6 +179,7 @@ static int devfreq_passive_event_handler(struct devfreq *devfreq, static struct devfreq_governor devfreq_passive = { .name = "passive", + .immutable = 1, .get_target_freq = devfreq_passive_get_target_freq, .event_handler = devfreq_passive_event_handler, }; diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 2de4e2eea180..e0acb0e5243b 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -104,6 +104,8 @@ struct devfreq_dev_profile { * struct devfreq_governor - Devfreq policy governor * @node: list node - contains registered devfreq governors * @name: Governor's name + * @immutable: Immutable flag for governor. If the value is 1, + * this govenror is never changeable to other governor. * @get_target_freq: Returns desired operating frequency for the device. * Basically, get_target_freq will run * devfreq_dev_profile.get_dev_status() to get the @@ -121,6 +123,7 @@ struct devfreq_governor { struct list_head node; const char name[DEVFREQ_NAME_LEN]; + const unsigned int immutable; int (*get_target_freq)(struct devfreq *this, unsigned long *freq); int (*event_handler)(struct devfreq *devfreq, unsigned int event, void *data); From fe8f92c7beba2f1c6b1235e52e199eafbe4aa39a Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 31 Jan 2017 15:38:17 +0900 Subject: [PATCH 053/154] PM / devfreq: Fix wrong trans_stat of passive devfreq device commit 30582c25a4b4e0a5e456a309fde79b845e9473b2 upstream. Until now, the trans_stat information of passive devfreq is not updated. This patch updates the trans_stat information after setting the target frequency of passive devfreq device. Fixes: 996133119f57 ("PM / devfreq: Add new passive governor") Signed-off-by: Chanwoo Choi Signed-off-by: MyungJoo Ham Signed-off-by: Greg Kroah-Hartman --- drivers/devfreq/devfreq.c | 3 ++- drivers/devfreq/governor.h | 2 ++ drivers/devfreq/governor_passive.c | 5 +++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 2eacfb4b905f..7309c0824887 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -130,7 +130,7 @@ static void devfreq_set_freq_table(struct devfreq *devfreq) * @devfreq: the devfreq instance * @freq: the update target frequency */ -static int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) +int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) { int lev, prev_lev, ret = 0; unsigned long cur_time; @@ -166,6 +166,7 @@ out: devfreq->last_stat_updated = cur_time; return ret; } +EXPORT_SYMBOL(devfreq_update_status); /** * find_devfreq_governor() - find devfreq governor from name diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h index fad7d6321978..71576b8bdfef 100644 --- a/drivers/devfreq/governor.h +++ b/drivers/devfreq/governor.h @@ -38,4 +38,6 @@ extern void devfreq_interval_update(struct devfreq *devfreq, extern int devfreq_add_governor(struct devfreq_governor *governor); extern int devfreq_remove_governor(struct devfreq_governor *governor); +extern int devfreq_update_status(struct devfreq *devfreq, unsigned long freq); + #endif /* _GOVERNOR_H */ diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index 93795b32dc09..5be96b2249e7 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -112,6 +112,11 @@ static int update_devfreq_passive(struct devfreq *devfreq, unsigned long freq) if (ret < 0) goto out; + if (devfreq->profile->freq_table + && (devfreq_update_status(devfreq, freq))) + dev_err(&devfreq->dev, + "Couldn't update frequency transition information.\n"); + devfreq->previous_freq = freq; out: From 9987feba902cbd6429cf3b3a3b6614aa23a555c2 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 9 Feb 2017 11:46:18 -0500 Subject: [PATCH 054/154] dm cache: fix corruption seen when using cache > 2TB commit ca763d0a53b264a650342cee206512bc92ac7050 upstream. A rounding bug due to compiler generated temporary being 32bit was found in remap_to_cache(). A localized cast in remap_to_cache() fixes the corruption but this preferred fix (changing from uint32_t to sector_t) eliminates potential for future rounding errors elsewhere. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-target.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 59b2c50562e4..c817627d09ca 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -248,7 +248,7 @@ struct cache { /* * Fields for converting from sectors to blocks. */ - uint32_t sectors_per_block; + sector_t sectors_per_block; int sectors_per_block_shift; spinlock_t lock; @@ -3546,11 +3546,11 @@ static void cache_status(struct dm_target *ti, status_type_t type, residency = policy_residency(cache->policy); - DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ", + DMEMIT("%u %llu/%llu %llu %llu/%llu %u %u %u %u %u %u %lu ", (unsigned)DM_CACHE_METADATA_BLOCK_SIZE, (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), (unsigned long long)nr_blocks_metadata, - cache->sectors_per_block, + (unsigned long long)cache->sectors_per_block, (unsigned long long) from_cblock(residency), (unsigned long long) from_cblock(cache->cache_size), (unsigned) atomic_read(&cache->stats.read_hit), From bad6c16b81b68d81e95ada246649dacc3cdd5e1d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 15 Feb 2017 12:06:19 -0500 Subject: [PATCH 055/154] dm stats: fix a leaked s->histogram_boundaries array commit 6085831883c25860264721df15f05bbded45e2a2 upstream. Fixes: dfcfac3e4cd9 ("dm stats: collect and report histogram of IO latencies") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-stats.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 38b05f23b96c..0250e7e521ab 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -175,6 +175,7 @@ static void dm_stat_free(struct rcu_head *head) int cpu; struct dm_stat *s = container_of(head, struct dm_stat, rcu_head); + kfree(s->histogram_boundaries); kfree(s->program_id); kfree(s->aux_data); for_each_possible_cpu(cpu) { From b7f874eedc9308a43b0d29e2a12f848be724fca7 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 16 Feb 2017 23:57:17 -0500 Subject: [PATCH 056/154] dm round robin: revert "use percpu 'repeat_count' and 'current_path'" commit 37a098e9d10db6e2efc05fe61e3a6ff2e9802c53 upstream. The sloppy nature of lockless access to percpu pointers (s->current_path) in rr_select_path(), from multiple threads, is causing some paths to used more than others -- which results in less IO performance being observed. Revert these upstream commits to restore truly symmetric round-robin IO submission in DM multipath: b0b477c dm round robin: use percpu 'repeat_count' and 'current_path' 802934b dm round robin: do not use this_cpu_ptr() without having preemption disabled There is no benefit to all this complexity if repeat_count = 1 (which is the recommended default). Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-round-robin.c | 67 ++++++++----------------------------- 1 file changed, 14 insertions(+), 53 deletions(-) diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-round-robin.c index 6c25213ab38c..bdbb7e6e8212 100644 --- a/drivers/md/dm-round-robin.c +++ b/drivers/md/dm-round-robin.c @@ -17,8 +17,8 @@ #include #define DM_MSG_PREFIX "multipath round-robin" -#define RR_MIN_IO 1000 -#define RR_VERSION "1.1.0" +#define RR_MIN_IO 1 +#define RR_VERSION "1.2.0" /*----------------------------------------------------------------- * Path-handling code, paths are held in lists @@ -47,44 +47,19 @@ struct selector { struct list_head valid_paths; struct list_head invalid_paths; spinlock_t lock; - struct dm_path * __percpu *current_path; - struct percpu_counter repeat_count; }; -static void set_percpu_current_path(struct selector *s, struct dm_path *path) -{ - int cpu; - - for_each_possible_cpu(cpu) - *per_cpu_ptr(s->current_path, cpu) = path; -} - static struct selector *alloc_selector(void) { struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL); - if (!s) - return NULL; - - INIT_LIST_HEAD(&s->valid_paths); - INIT_LIST_HEAD(&s->invalid_paths); - spin_lock_init(&s->lock); - - s->current_path = alloc_percpu(struct dm_path *); - if (!s->current_path) - goto out_current_path; - set_percpu_current_path(s, NULL); - - if (percpu_counter_init(&s->repeat_count, 0, GFP_KERNEL)) - goto out_repeat_count; + if (s) { + INIT_LIST_HEAD(&s->valid_paths); + INIT_LIST_HEAD(&s->invalid_paths); + spin_lock_init(&s->lock); + } return s; - -out_repeat_count: - free_percpu(s->current_path); -out_current_path: - kfree(s); - return NULL;; } static int rr_create(struct path_selector *ps, unsigned argc, char **argv) @@ -105,8 +80,6 @@ static void rr_destroy(struct path_selector *ps) free_paths(&s->valid_paths); free_paths(&s->invalid_paths); - free_percpu(s->current_path); - percpu_counter_destroy(&s->repeat_count); kfree(s); ps->context = NULL; } @@ -157,6 +130,11 @@ static int rr_add_path(struct path_selector *ps, struct dm_path *path, return -EINVAL; } + if (repeat_count > 1) { + DMWARN_LIMIT("repeat_count > 1 is deprecated, using 1 instead"); + repeat_count = 1; + } + /* allocate the path */ pi = kmalloc(sizeof(*pi), GFP_KERNEL); if (!pi) { @@ -183,9 +161,6 @@ static void rr_fail_path(struct path_selector *ps, struct dm_path *p) struct path_info *pi = p->pscontext; spin_lock_irqsave(&s->lock, flags); - if (p == *this_cpu_ptr(s->current_path)) - set_percpu_current_path(s, NULL); - list_move(&pi->list, &s->invalid_paths); spin_unlock_irqrestore(&s->lock, flags); } @@ -208,29 +183,15 @@ static struct dm_path *rr_select_path(struct path_selector *ps, size_t nr_bytes) unsigned long flags; struct selector *s = ps->context; struct path_info *pi = NULL; - struct dm_path *current_path = NULL; - local_irq_save(flags); - current_path = *this_cpu_ptr(s->current_path); - if (current_path) { - percpu_counter_dec(&s->repeat_count); - if (percpu_counter_read_positive(&s->repeat_count) > 0) { - local_irq_restore(flags); - return current_path; - } - } - - spin_lock(&s->lock); + spin_lock_irqsave(&s->lock, flags); if (!list_empty(&s->valid_paths)) { pi = list_entry(s->valid_paths.next, struct path_info, list); list_move_tail(&pi->list, &s->valid_paths); - percpu_counter_set(&s->repeat_count, pi->repeat_count); - set_percpu_current_path(s, pi->path); - current_path = pi->path; } spin_unlock_irqrestore(&s->lock, flags); - return current_path; + return pi ? pi->path : NULL; } static struct path_selector_type rr_ps = { From 2937e22c2314cdfe31d82e2cc36af4be863be978 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Tue, 28 Feb 2017 19:17:49 +0100 Subject: [PATCH 057/154] dm raid: fix data corruption on reshape request commit d36a19541fe8f392778ac137d60f9be8dfdd8f9d upstream. The lvm2 sequence to manage dm-raid constructor flags that trigger a rebuild or a reshape is defined as: 1) load table with flags (e.g. rebuild/delta_disks/data_offset) 2) clear out the flags in lvm2 metadata 3) store the lvm2 metadata, reload the table to reset the flags previously established during the initial load (1) -- in order to prevent repeatedly requesting a rebuild or a reshape on activation Currently, loading an inactive table with rebuild/reshape flags specified will cause dm-raid to rebuild/reshape on resume and thus start updating the raid metadata (about the progress). When the second table reload, to reset the flags, occurs the constructor accesses the volatile progress state kept in the raid superblocks. Because the active mapping is still processing the rebuild/reshape, that position will be stale by the time the device is resumed. In the reshape case, this causes data corruption by processing already reshaped stripes again. In the rebuild case, it does _not_ cause data corruption but instead involves superfluous rebuilds. Fix by keeping the raid set frozen during the first resume and then allow the rebuild/reshape during the second resume. Fixes: 9dbd1aa3a ("dm raid: add reshaping support to the target") Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-raid.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index af2d79b52484..15daa36fcea6 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3621,6 +3621,8 @@ static int raid_preresume(struct dm_target *ti) return r; } +#define RESUME_STAY_FROZEN_FLAGS (CTR_FLAG_DELTA_DISKS | CTR_FLAG_DATA_OFFSET) + static void raid_resume(struct dm_target *ti) { struct raid_set *rs = ti->private; @@ -3638,7 +3640,15 @@ static void raid_resume(struct dm_target *ti) mddev->ro = 0; mddev->in_sync = 0; - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + /* + * Keep the RAID set frozen if reshape/rebuild flags are set. + * The RAID set is unfrozen once the next table load/resume, + * which clears the reshape/rebuild flags, occurs. + * This ensures that the constructor for the inactive table + * retrieves an up-to-date reshape_position. + */ + if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS)) + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); if (mddev->suspended) mddev_resume(mddev); From 27f5ef378d2d56dad3c1a4c93c27fc5ce7c6eda0 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 14 Dec 2016 18:46:01 -0800 Subject: [PATCH 058/154] scsi: storvsc: use tagged SRB requests if supported by the device commit 3cd6d3d9b1abab8dcdf0800224ce26daac24eea2 upstream. Properly set SRB flags when hosting device supports tagged queuing. This patch improves the performance on Fiber Channel disks. Signed-off-by: Long Li Reviewed-by: K. Y. Srinivasan Signed-off-by: K. Y. Srinivasan Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/storvsc_drv.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 8ccfc9ea874b..c193629ac060 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -136,6 +136,8 @@ struct hv_fc_wwn_packet { #define SRB_FLAGS_PORT_DRIVER_RESERVED 0x0F000000 #define SRB_FLAGS_CLASS_DRIVER_RESERVED 0xF0000000 +#define SP_UNTAGGED ((unsigned char) ~0) +#define SRB_SIMPLE_TAG_REQUEST 0x20 /* * Platform neutral description of a scsi request - @@ -1451,6 +1453,13 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) vm_srb->win8_extension.srb_flags |= SRB_FLAGS_DISABLE_SYNCH_TRANSFER; + if (scmnd->device->tagged_supported) { + vm_srb->win8_extension.srb_flags |= + (SRB_FLAGS_QUEUE_ACTION_ENABLE | SRB_FLAGS_NO_QUEUE_FREEZE); + vm_srb->win8_extension.queue_tag = SP_UNTAGGED; + vm_srb->win8_extension.queue_action = SRB_SIMPLE_TAG_REQUEST; + } + /* Build the SRB */ switch (scmnd->sc_data_direction) { case DMA_TO_DEVICE: From e59693753e08c70a4da76e951ba4279003c1228e Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 14 Dec 2016 18:46:02 -0800 Subject: [PATCH 059/154] scsi: storvsc: properly handle SRB_ERROR when sense message is present commit bba5dc332ec2d3a685cb4dae668c793f6a3713a3 upstream. When sense message is present on error, we should pass along to the upper layer to decide how to deal with the error. This patch fixes connectivity issues with Fiber Channel devices. Signed-off-by: Long Li Reviewed-by: K. Y. Srinivasan Signed-off-by: K. Y. Srinivasan Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/storvsc_drv.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index c193629ac060..5d71a661ee67 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -890,6 +890,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, switch (SRB_STATUS(vm_srb->srb_status)) { case SRB_STATUS_ERROR: + /* + * Let upper layer deal with error when + * sense message is present. + */ + + if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) + break; /* * If there is an error; offline the device since all * error recovery strategies would have already been From a50781fe6edabe33ecdef862fd9e7f2f35277de6 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 14 Dec 2016 18:46:03 -0800 Subject: [PATCH 060/154] scsi: storvsc: properly set residual data length on errors commit 40630f462824ee24bc00d692865c86c3828094e0 upstream. On I/O errors, the Windows driver doesn't set data_transfer_length on error conditions other than SRB_STATUS_DATA_OVERRUN. In these cases we need to set data_transfer_length to 0, indicating there is no data transferred. On SRB_STATUS_DATA_OVERRUN, data_transfer_length is set by the Windows driver to the actual data transferred. Reported-by: Shiva Krishna Signed-off-by: Long Li Reviewed-by: K. Y. Srinivasan Signed-off-by: K. Y. Srinivasan Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/storvsc_drv.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 5d71a661ee67..3f218f5cf29b 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -377,6 +377,7 @@ enum storvsc_request_type { #define SRB_STATUS_SUCCESS 0x01 #define SRB_STATUS_ABORTED 0x02 #define SRB_STATUS_ERROR 0x04 +#define SRB_STATUS_DATA_OVERRUN 0x12 #define SRB_STATUS(status) \ (status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN)) @@ -962,6 +963,7 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request, struct scsi_cmnd *scmnd = cmd_request->cmd; struct scsi_sense_hdr sense_hdr; struct vmscsi_request *vm_srb; + u32 data_transfer_length; struct Scsi_Host *host; u32 payload_sz = cmd_request->payload_sz; void *payload = cmd_request->payload; @@ -969,6 +971,7 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request, host = stor_dev->host; vm_srb = &cmd_request->vstor_packet.vm_srb; + data_transfer_length = vm_srb->data_transfer_length; scmnd->result = vm_srb->scsi_status; @@ -982,13 +985,20 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request, &sense_hdr); } - if (vm_srb->srb_status != SRB_STATUS_SUCCESS) + if (vm_srb->srb_status != SRB_STATUS_SUCCESS) { storvsc_handle_error(vm_srb, scmnd, host, sense_hdr.asc, sense_hdr.ascq); + /* + * The Windows driver set data_transfer_length on + * SRB_STATUS_DATA_OVERRUN. On other errors, this value + * is untouched. In these cases we set it to 0. + */ + if (vm_srb->srb_status != SRB_STATUS_DATA_OVERRUN) + data_transfer_length = 0; + } scsi_set_resid(scmnd, - cmd_request->payload->range.len - - vm_srb->data_transfer_length); + cmd_request->payload->range.len - data_transfer_length); scmnd->scsi_done(scmnd); From 73f5176ecac265b03dcd8ef0aaed6950e5ecb828 Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Thu, 16 Feb 2017 12:51:21 -0800 Subject: [PATCH 061/154] scsi: aacraid: Reorder Adapter status check commit c421530bf848604e97d0785a03b3fe2c62775083 upstream. The driver currently checks the SELF_TEST_FAILED first and then KERNEL_PANIC next. Under error conditions(boot code failure) both SELF_TEST_FAILED and KERNEL_PANIC can be set at the same time. The driver has the capability to reset the controller on an KERNEL_PANIC, but not on SELF_TEST_FAILED. Fixed by first checking KERNEL_PANIC and then the others. Fixes: e8b12f0fb835223752 ([SCSI] aacraid: Add new code for PMC-Sierra's SRC base controller family) Signed-off-by: Raghava Aditya Renukunta Reviewed-by: David Carroll Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aacraid/src.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index 0c453880f214..7b178d765726 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -413,17 +413,24 @@ static int aac_src_check_health(struct aac_dev *dev) { u32 status = src_readl(dev, MUnit.OMR); - /* - * Check to see if the board failed any self tests. - */ - if (unlikely(status & SELF_TEST_FAILED)) - return -1; - /* * Check to see if the board panic'd. */ if (unlikely(status & KERNEL_PANIC)) - return (status >> 16) & 0xFF; + goto err_blink; + + /* + * Check to see if the board failed any self tests. + */ + if (unlikely(status & SELF_TEST_FAILED)) + goto err_out; + + /* + * Check to see if the board failed any self tests. + */ + if (unlikely(status & MONITOR_PANIC)) + goto err_out; + /* * Wait for the adapter to be up and running. */ @@ -433,6 +440,12 @@ static int aac_src_check_health(struct aac_dev *dev) * Everything is OK */ return 0; + +err_out: + return -1; + +err_blink: + return (status > 16) & 0xFF; } /** From e9dc8334d765f7910114921c63a0324272f1f636 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 17 Feb 2017 09:02:45 +0100 Subject: [PATCH 062/154] scsi: use 'scsi_device_from_queue()' for scsi_dh commit 857de6e00778738dc3d61f75acbac35bdc48e533 upstream. The device handler needs to check if a given queue belongs to a scsi device; only then does it make sense to attach a device handler. [mkp: dropped flags] Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_dh.c | 22 ++++------------------ drivers/scsi/scsi_lib.c | 23 +++++++++++++++++++++++ include/scsi/scsi_device.h | 1 + 3 files changed, 28 insertions(+), 18 deletions(-) diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c index b8d3b97b217a..84addee05be6 100644 --- a/drivers/scsi/scsi_dh.c +++ b/drivers/scsi/scsi_dh.c @@ -219,20 +219,6 @@ int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh) } EXPORT_SYMBOL_GPL(scsi_unregister_device_handler); -static struct scsi_device *get_sdev_from_queue(struct request_queue *q) -{ - struct scsi_device *sdev; - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - sdev = q->queuedata; - if (!sdev || !get_device(&sdev->sdev_gendev)) - sdev = NULL; - spin_unlock_irqrestore(q->queue_lock, flags); - - return sdev; -} - /* * scsi_dh_activate - activate the path associated with the scsi_device * corresponding to the given request queue. @@ -251,7 +237,7 @@ int scsi_dh_activate(struct request_queue *q, activate_complete fn, void *data) struct scsi_device *sdev; int err = SCSI_DH_NOSYS; - sdev = get_sdev_from_queue(q); + sdev = scsi_device_from_queue(q); if (!sdev) { if (fn) fn(data, err); @@ -298,7 +284,7 @@ int scsi_dh_set_params(struct request_queue *q, const char *params) struct scsi_device *sdev; int err = -SCSI_DH_NOSYS; - sdev = get_sdev_from_queue(q); + sdev = scsi_device_from_queue(q); if (!sdev) return err; @@ -321,7 +307,7 @@ int scsi_dh_attach(struct request_queue *q, const char *name) struct scsi_device_handler *scsi_dh; int err = 0; - sdev = get_sdev_from_queue(q); + sdev = scsi_device_from_queue(q); if (!sdev) return -ENODEV; @@ -359,7 +345,7 @@ const char *scsi_dh_attached_handler_name(struct request_queue *q, gfp_t gfp) struct scsi_device *sdev; const char *handler_name = NULL; - sdev = get_sdev_from_queue(q); + sdev = scsi_device_from_queue(q); if (!sdev) return NULL; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e64eae4392a4..d8099c7cab00 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2127,6 +2127,29 @@ void scsi_mq_destroy_tags(struct Scsi_Host *shost) blk_mq_free_tag_set(&shost->tag_set); } +/** + * scsi_device_from_queue - return sdev associated with a request_queue + * @q: The request queue to return the sdev from + * + * Return the sdev associated with a request queue or NULL if the + * request_queue does not reference a SCSI device. + */ +struct scsi_device *scsi_device_from_queue(struct request_queue *q) +{ + struct scsi_device *sdev = NULL; + + if (q->mq_ops) { + if (q->mq_ops == &scsi_mq_ops) + sdev = q->queuedata; + } else if (q->request_fn == scsi_request_fn) + sdev = q->queuedata; + if (!sdev || !get_device(&sdev->sdev_gendev)) + sdev = NULL; + + return sdev; +} +EXPORT_SYMBOL_GPL(scsi_device_from_queue); + /* * Function: scsi_block_requests() * diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 8a9563144890..b9ec4939b80c 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -315,6 +315,7 @@ extern void scsi_remove_device(struct scsi_device *); extern int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh); void scsi_attach_vpd(struct scsi_device *sdev); +extern struct scsi_device *scsi_device_from_queue(struct request_queue *q); extern int scsi_device_get(struct scsi_device *); extern void scsi_device_put(struct scsi_device *); extern struct scsi_device *scsi_device_lookup(struct Scsi_Host *, From 206af3d97f00d5c96de5bfd5a48045d444b3eec4 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 25 Oct 2016 11:37:59 +0200 Subject: [PATCH 063/154] power: reset: at91-poweroff: timely shutdown LPDDR memories commit 0b0408745e7ff24757cbfd571d69026c0ddb803c upstream. LPDDR memories can only handle up to 400 uncontrolled power off. Ensure the proper power off sequence is used before shutting down the platform. Signed-off-by: Alexandre Belloni Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/reset/Kconfig | 2 +- drivers/power/reset/at91-poweroff.c | 54 +++++++++++++++++++++++- drivers/power/reset/at91-sama5d2_shdwc.c | 49 ++++++++++++++++++++- 3 files changed, 102 insertions(+), 3 deletions(-) diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig index c74c3f67b8da..02e46bbcf45d 100644 --- a/drivers/power/reset/Kconfig +++ b/drivers/power/reset/Kconfig @@ -32,7 +32,7 @@ config POWER_RESET_AT91_RESET config POWER_RESET_AT91_SAMA5D2_SHDWC tristate "Atmel AT91 SAMA5D2-Compatible shutdown controller driver" - depends on ARCH_AT91 || COMPILE_TEST + depends on ARCH_AT91 default SOC_SAMA5 help This driver supports the alternate shutdown controller for some Atmel diff --git a/drivers/power/reset/at91-poweroff.c b/drivers/power/reset/at91-poweroff.c index e9e24df35f26..2579f025b90b 100644 --- a/drivers/power/reset/at91-poweroff.c +++ b/drivers/power/reset/at91-poweroff.c @@ -14,9 +14,12 @@ #include #include #include +#include #include #include +#include + #define AT91_SHDW_CR 0x00 /* Shut Down Control Register */ #define AT91_SHDW_SHDW BIT(0) /* Shut Down command */ #define AT91_SHDW_KEY (0xa5 << 24) /* KEY Password */ @@ -50,6 +53,7 @@ static const char *shdwc_wakeup_modes[] = { static void __iomem *at91_shdwc_base; static struct clk *sclk; +static void __iomem *mpddrc_base; static void __init at91_wakeup_status(void) { @@ -73,6 +77,29 @@ static void at91_poweroff(void) writel(AT91_SHDW_KEY | AT91_SHDW_SHDW, at91_shdwc_base + AT91_SHDW_CR); } +static void at91_lpddr_poweroff(void) +{ + asm volatile( + /* Align to cache lines */ + ".balign 32\n\t" + + /* Ensure AT91_SHDW_CR is in the TLB by reading it */ + " ldr r6, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t" + + /* Power down SDRAM0 */ + " str %1, [%0, #" __stringify(AT91_DDRSDRC_LPR) "]\n\t" + /* Shutdown CPU */ + " str %3, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t" + + " b .\n\t" + : + : "r" (mpddrc_base), + "r" cpu_to_le32(AT91_DDRSDRC_LPDDR2_PWOFF), + "r" (at91_shdwc_base), + "r" cpu_to_le32(AT91_SHDW_KEY | AT91_SHDW_SHDW) + : "r0"); +} + static int at91_poweroff_get_wakeup_mode(struct device_node *np) { const char *pm; @@ -124,6 +151,8 @@ static void at91_poweroff_dt_set_wakeup_mode(struct platform_device *pdev) static int __init at91_poweroff_probe(struct platform_device *pdev) { struct resource *res; + struct device_node *np; + u32 ddr_type; int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -150,12 +179,30 @@ static int __init at91_poweroff_probe(struct platform_device *pdev) pm_power_off = at91_poweroff; + np = of_find_compatible_node(NULL, NULL, "atmel,sama5d3-ddramc"); + if (!np) + return 0; + + mpddrc_base = of_iomap(np, 0); + of_node_put(np); + + if (!mpddrc_base) + return 0; + + ddr_type = readl(mpddrc_base + AT91_DDRSDRC_MDR) & AT91_DDRSDRC_MD; + if ((ddr_type == AT91_DDRSDRC_MD_LPDDR2) || + (ddr_type == AT91_DDRSDRC_MD_LPDDR3)) + pm_power_off = at91_lpddr_poweroff; + else + iounmap(mpddrc_base); + return 0; } static int __exit at91_poweroff_remove(struct platform_device *pdev) { - if (pm_power_off == at91_poweroff) + if (pm_power_off == at91_poweroff || + pm_power_off == at91_lpddr_poweroff) pm_power_off = NULL; clk_disable_unprepare(sclk); @@ -163,6 +210,11 @@ static int __exit at91_poweroff_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id at91_ramc_of_match[] = { + { .compatible = "atmel,sama5d3-ddramc", }, + { /* sentinel */ } +}; + static const struct of_device_id at91_poweroff_of_match[] = { { .compatible = "atmel,at91sam9260-shdwc", }, { .compatible = "atmel,at91sam9rl-shdwc", }, diff --git a/drivers/power/reset/at91-sama5d2_shdwc.c b/drivers/power/reset/at91-sama5d2_shdwc.c index 8a5ac9706c9c..90b0b5a70ce5 100644 --- a/drivers/power/reset/at91-sama5d2_shdwc.c +++ b/drivers/power/reset/at91-sama5d2_shdwc.c @@ -22,9 +22,12 @@ #include #include #include +#include #include #include +#include + #define SLOW_CLOCK_FREQ 32768 #define AT91_SHDW_CR 0x00 /* Shut Down Control Register */ @@ -75,6 +78,7 @@ struct shdwc { */ static struct shdwc *at91_shdwc; static struct clk *sclk; +static void __iomem *mpddrc_base; static const unsigned long long sdwc_dbc_period[] = { 0, 3, 32, 512, 4096, 32768, @@ -108,6 +112,29 @@ static void at91_poweroff(void) at91_shdwc->at91_shdwc_base + AT91_SHDW_CR); } +static void at91_lpddr_poweroff(void) +{ + asm volatile( + /* Align to cache lines */ + ".balign 32\n\t" + + /* Ensure AT91_SHDW_CR is in the TLB by reading it */ + " ldr r6, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t" + + /* Power down SDRAM0 */ + " str %1, [%0, #" __stringify(AT91_DDRSDRC_LPR) "]\n\t" + /* Shutdown CPU */ + " str %3, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t" + + " b .\n\t" + : + : "r" (mpddrc_base), + "r" cpu_to_le32(AT91_DDRSDRC_LPDDR2_PWOFF), + "r" (at91_shdwc->at91_shdwc_base), + "r" cpu_to_le32(AT91_SHDW_KEY | AT91_SHDW_SHDW) + : "r0"); +} + static u32 at91_shdwc_debouncer_value(struct platform_device *pdev, u32 in_period_us) { @@ -212,6 +239,8 @@ static int __init at91_shdwc_probe(struct platform_device *pdev) { struct resource *res; const struct of_device_id *match; + struct device_node *np; + u32 ddr_type; int ret; if (!pdev->dev.of_node) @@ -249,6 +278,23 @@ static int __init at91_shdwc_probe(struct platform_device *pdev) pm_power_off = at91_poweroff; + np = of_find_compatible_node(NULL, NULL, "atmel,sama5d3-ddramc"); + if (!np) + return 0; + + mpddrc_base = of_iomap(np, 0); + of_node_put(np); + + if (!mpddrc_base) + return 0; + + ddr_type = readl(mpddrc_base + AT91_DDRSDRC_MDR) & AT91_DDRSDRC_MD; + if ((ddr_type == AT91_DDRSDRC_MD_LPDDR2) || + (ddr_type == AT91_DDRSDRC_MD_LPDDR3)) + pm_power_off = at91_lpddr_poweroff; + else + iounmap(mpddrc_base); + return 0; } @@ -256,7 +302,8 @@ static int __exit at91_shdwc_remove(struct platform_device *pdev) { struct shdwc *shdw = platform_get_drvdata(pdev); - if (pm_power_off == at91_poweroff) + if (pm_power_off == at91_poweroff || + pm_power_off == at91_lpddr_poweroff) pm_power_off = NULL; /* Reset values to disable wake-up features */ From 3de5a92847c1d87d86ee945e6002f2051e02e868 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 3 Nov 2016 10:29:28 -0600 Subject: [PATCH 064/154] Fix: Disable sys_membarrier when nohz_full is enabled commit 907565337ebf998a68cb5c5b2174ce5e5da065eb upstream. Userspace applications should be allowed to expect the membarrier system call with MEMBARRIER_CMD_SHARED command to issue memory barriers on nohz_full CPUs, but synchronize_sched() does not take those into account. Given that we do not want unrelated processes to be able to affect real-time sensitive nohz_full CPUs, simply return ENOSYS when membarrier is invoked on a kernel with enabled nohz_full CPUs. Signed-off-by: Mathieu Desnoyers CC: Josh Triplett CC: Steven Rostedt Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Cc: Chris Metcalf Cc: Rik van Riel Acked-by: Lai Jiangshan Reviewed-by: Josh Triplett Signed-off-by: Greg Kroah-Hartman --- kernel/membarrier.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/membarrier.c b/kernel/membarrier.c index 536c727a56e9..9f9284f37f8d 100644 --- a/kernel/membarrier.c +++ b/kernel/membarrier.c @@ -16,6 +16,7 @@ #include #include +#include /* * Bitmask made from a "or" of all commands within enum membarrier_cmd, @@ -51,6 +52,9 @@ */ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) { + /* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */ + if (tick_nohz_full_enabled()) + return -ENOSYS; if (unlikely(flags)) return -EINVAL; switch (cmd) { From a9b0c14ba1e4be94036c5725c144818a2d3be191 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 4 Feb 2017 23:14:19 -0500 Subject: [PATCH 065/154] jbd2: don't leak modified metadata buffers on an aborted journal commit e112666b4959b25a8552d63bc564e1059be703e8 upstream. If the journal has been aborted, we shouldn't mark the underlying buffer head as dirty, since that will cause the metadata block to get modified. And if the journal has been aborted, we shouldn't allow this since it will almost certainly lead to a corrupted file system. Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/transaction.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e1652665bd93..5e659ee08d6a 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1863,7 +1863,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) __blist_del_buffer(list, jh); jh->b_jlist = BJ_None; - if (test_clear_buffer_jbddirty(bh)) + if (transaction && is_journal_aborted(transaction->t_journal)) + clear_buffer_jbddirty(bh); + else if (test_clear_buffer_jbddirty(bh)) mark_buffer_dirty(bh); /* Expose it to the VM */ } From 50447afd96145b6d3d7aea412da178dce95f1f9b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 11 Feb 2017 11:40:45 +0800 Subject: [PATCH 066/154] block/loop: fix race between I/O and set_status commit ecdd09597a57251323b0de50e3d45e69298c4a83 upstream. Inside set_status, transfer need to setup again, so we have to drain IO before the transition, otherwise oops may be triggered like the following: divide error: 0000 [#1] SMP KASAN CPU: 0 PID: 2935 Comm: loop7 Not tainted 4.10.0-rc7+ #213 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88006ba1e840 task.stack: ffff880067338000 RIP: 0010:transfer_xor+0x1d1/0x440 drivers/block/loop.c:110 RSP: 0018:ffff88006733f108 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8800688d7000 RCX: 0000000000000059 RDX: 0000000000000000 RSI: 1ffff1000d743f43 RDI: ffff880068891c08 RBP: ffff88006733f160 R08: ffff8800688d7001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800688d7000 R13: ffff880067b7d000 R14: dffffc0000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88006d000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000006c17e0 CR3: 0000000066e3b000 CR4: 00000000001406f0 Call Trace: lo_do_transfer drivers/block/loop.c:251 [inline] lo_read_transfer drivers/block/loop.c:392 [inline] do_req_filebacked drivers/block/loop.c:541 [inline] loop_handle_cmd drivers/block/loop.c:1677 [inline] loop_queue_work+0xda0/0x49b0 drivers/block/loop.c:1689 kthread_worker_fn+0x4c3/0xa30 kernel/kthread.c:630 kthread+0x326/0x3f0 kernel/kthread.c:227 ret_from_fork+0x31/0x40 arch/x86/entry/entry_64.S:430 Code: 03 83 e2 07 41 29 df 42 0f b6 04 30 4d 8d 44 24 01 38 d0 7f 08 84 c0 0f 85 62 02 00 00 44 89 f8 41 0f b6 48 ff 25 ff 01 00 00 99 7d c8 48 63 d2 48 03 55 d0 48 89 d0 48 89 d7 48 c1 e8 03 83 RIP: transfer_xor+0x1d1/0x440 drivers/block/loop.c:110 RSP: ffff88006733f108 ---[ end trace 0166f7bd3b0c0933 ]--- Reported-by: Dmitry Vyukov Signed-off-by: Ming Lei Tested-by: Dmitry Vyukov Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 4af818766797..ccc3acc0c5ba 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1097,9 +1097,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) return -EINVAL; + /* I/O need to be drained during transfer transition */ + blk_mq_freeze_queue(lo->lo_queue); + err = loop_release_xfer(lo); if (err) - return err; + goto exit; if (info->lo_encrypt_type) { unsigned int type = info->lo_encrypt_type; @@ -1114,12 +1117,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) err = loop_init_xfer(lo, xfer, info); if (err) - return err; + goto exit; if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) - if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) - return -EFBIG; + if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { + err = -EFBIG; + goto exit; + } loop_config_discard(lo); @@ -1156,7 +1161,9 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) /* update dio if lo_offset or transfer is changed */ __loop_update_dio(lo, lo->use_dio); - return 0; + exit: + blk_mq_unfreeze_queue(lo->lo_queue); + return err; } static int From 8ca25e39ec2d67df1f69f08c1d4ad0d6310a5c5b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 1 Mar 2017 10:42:38 -0800 Subject: [PATCH 067/154] loop: fix LO_FLAGS_PARTSCAN hang commit e02898b423802b1f3a3aaa7f16e896da069ba8f7 upstream. loop_reread_partitions() needs to do I/O, but we just froze the queue, so we end up waiting forever. This can easily be reproduced with losetup -P. Fix it by moving the reread to after we unfreeze the queue. Fixes: ecdd09597a57 ("block/loop: fix race between I/O and set_status") Reported-by: Tejun Heo Signed-off-by: Omar Sandoval Reviewed-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ccc3acc0c5ba..24d6cefceb32 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1142,13 +1142,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) (info->lo_flags & LO_FLAGS_AUTOCLEAR)) lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; - if ((info->lo_flags & LO_FLAGS_PARTSCAN) && - !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { - lo->lo_flags |= LO_FLAGS_PARTSCAN; - lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; - loop_reread_partitions(lo, lo->lo_device); - } - lo->lo_encrypt_key_size = info->lo_encrypt_key_size; lo->lo_init[0] = info->lo_init[0]; lo->lo_init[1] = info->lo_init[1]; @@ -1163,6 +1156,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) exit: blk_mq_unfreeze_queue(lo->lo_queue); + + if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) && + !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { + lo->lo_flags |= LO_FLAGS_PARTSCAN; + lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; + loop_reread_partitions(lo, lo->lo_device); + } + return err; } From 72ae476d0401c38c912ad740cadcc7ef302f5ef2 Mon Sep 17 00:00:00 2001 From: Roman Pen Date: Sun, 8 Jan 2017 20:59:35 -0500 Subject: [PATCH 068/154] ext4: Include forgotten start block on fallocate insert range commit 2a9b8cba62c0741109c33a2be700ff3d7703a7c2 upstream. While doing 'insert range' start block should be also shifted right. The bug can be easily reproduced by the following test: ptr = malloc(4096); assert(ptr); fd = open("./ext4.file", O_CREAT | O_TRUNC | O_RDWR, 0600); assert(fd >= 0); rc = fallocate(fd, 0, 0, 8192); assert(rc == 0); for (i = 0; i < 2048; i++) *((unsigned short *)ptr + i) = 0xbeef; rc = pwrite(fd, ptr, 4096, 0); assert(rc == 4096); rc = pwrite(fd, ptr, 4096, 4096); assert(rc == 4096); for (block = 2; block < 1000; block++) { rc = fallocate(fd, FALLOC_FL_INSERT_RANGE, 4096, 4096); assert(rc == 0); for (i = 0; i < 2048; i++) *((unsigned short *)ptr + i) = block; rc = pwrite(fd, ptr, 4096, 4096); assert(rc == 4096); } Because start block is not included in the range the hole appears at the wrong offset (just after the desired offset) and the following pwrite() overwrites already existent block, keeping hole untouched. Simple way to verify wrong behaviour is to check zeroed blocks after the test: $ hexdump ./ext4.file | grep '0000 0000' The root cause of the bug is a wrong range (start, stop], where start should be inclusive, i.e. [start, stop]. This patch fixes the problem by including start into the range. But not to break left shift (range collapse) stop points to the beginning of the a block, not to the end. The other not obvious change is an iterator check on validness in a main loop. Because iterator is unsigned the following corner case should be considered with care: insert a block at 0 offset, when stop variables overflows and never becomes less than start, which is 0. To handle this special case iterator is set to NULL to indicate that end of the loop is reached. Fixes: 331573febb6a2 Signed-off-by: Roman Pen Signed-off-by: Theodore Ts'o Cc: Namjae Jeon Cc: Andreas Dilger Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c930a0110fb4..b4987ea2ca79 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5353,8 +5353,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, if (!extent) goto out; - stop = le32_to_cpu(extent->ee_block) + - ext4_ext_get_actual_len(extent); + stop = le32_to_cpu(extent->ee_block); /* * In case of left shift, Don't start shifting extents until we make @@ -5393,8 +5392,12 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, else iterator = &stop; - /* Its safe to start updating extents */ - while (start < stop) { + /* + * Its safe to start updating extents. Start and stop are unsigned, so + * in case of right shift if extent with 0 block is reached, iterator + * becomes NULL to indicate the end of the loop. + */ + while (iterator && start <= stop) { path = ext4_find_extent(inode, *iterator, &path, 0); if (IS_ERR(path)) return PTR_ERR(path); @@ -5422,8 +5425,11 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, ext4_ext_get_actual_len(extent); } else { extent = EXT_FIRST_EXTENT(path[depth].p_hdr); - *iterator = le32_to_cpu(extent->ee_block) > 0 ? - le32_to_cpu(extent->ee_block) - 1 : 0; + if (le32_to_cpu(extent->ee_block) > 0) + *iterator = le32_to_cpu(extent->ee_block) - 1; + else + /* Beginning is reached, end of the loop */ + iterator = NULL; /* Update path extent in case we need to stop */ while (le32_to_cpu(extent->ee_block) < start) extent++; From e0b53d67291334125531f7f21b2f3df3d6ccc6ec Mon Sep 17 00:00:00 2001 From: Roman Pen Date: Sun, 8 Jan 2017 21:00:35 -0500 Subject: [PATCH 069/154] ext4: do not polute the extents cache while shifting extents commit 03e916fa8b5577d85471452a3d0c5738aa658dae upstream. Inside ext4_ext_shift_extents() function ext4_find_extent() is called without EXT4_EX_NOCACHE flag, which should prevent cache population. This leads to oudated offsets in the extents tree and wrong blocks afterwards. Patch fixes the problem providing EXT4_EX_NOCACHE flag for each ext4_find_extents() call inside ext4_ext_shift_extents function. Fixes: 331573febb6a2 Signed-off-by: Roman Pen Signed-off-by: Theodore Ts'o Cc: Namjae Jeon Cc: Andreas Dilger Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index b4987ea2ca79..9fbf92ca358c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5344,7 +5344,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, ext4_lblk_t stop, *iterator, ex_start, ex_end; /* Let path point to the last extent */ - path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); + path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, + EXT4_EX_NOCACHE); if (IS_ERR(path)) return PTR_ERR(path); @@ -5360,7 +5361,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * sure the hole is big enough to accommodate the shift. */ if (SHIFT == SHIFT_LEFT) { - path = ext4_find_extent(inode, start - 1, &path, 0); + path = ext4_find_extent(inode, start - 1, &path, + EXT4_EX_NOCACHE); if (IS_ERR(path)) return PTR_ERR(path); depth = path->p_depth; @@ -5398,7 +5400,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * becomes NULL to indicate the end of the loop. */ while (iterator && start <= stop) { - path = ext4_find_extent(inode, *iterator, &path, 0); + path = ext4_find_extent(inode, *iterator, &path, + EXT4_EX_NOCACHE); if (IS_ERR(path)) return PTR_ERR(path); depth = path->p_depth; From fc6c2da174edd7a7b760b12c60d432d300e05cca Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 27 Jan 2017 14:34:30 -0500 Subject: [PATCH 070/154] ext4: trim allocation requests to group size commit cd648b8a8fd5071d232242d5ee7ee3c0815776af upstream. If filesystem groups are artifically small (using parameter -g to mkfs.ext4), ext4_mb_normalize_request() can result in a request that is larger than a block group. Trim the request size to not confuse allocation code. Reported-by: "Kirill A. Shutemov" Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 7ae43c59bc79..2e9fc7a61048 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3123,6 +3123,13 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, if (ar->pright && start + size - 1 >= ar->lright) size -= start + size - ar->lright; + /* + * Trim allocation request for filesystems with artificially small + * groups. + */ + if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)) + size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb); + end = start + size; /* check we don't cross already preallocated blocks */ From a5a9cf387de6dde4c79b6e7fb6311dbd7ec86f02 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 27 Jan 2017 14:35:38 -0500 Subject: [PATCH 071/154] ext4: fix data corruption in data=journal mode commit 3b136499e906460919f0d21a49db1aaccf0ae963 upstream. ext4_journalled_write_end() did not propely handle all the cases when generic_perform_write() did not copy all the data into the target page and could mark buffers with uninitialized contents as uptodate and dirty leading to possible data corruption (which would be quickly fixed by generic_perform_write() retrying the write but still). Fix the problem by carefully handling the case when the page that is written to is not uptodate. Reported-by: Al Viro Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 33a509c876ee..a48b17cc0cd1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1379,7 +1379,9 @@ errout: * set the buffer to be dirty, since in data=journalled mode we need * to call ext4_handle_dirty_metadata() instead. */ -static void zero_new_buffers(struct page *page, unsigned from, unsigned to) +static void ext4_journalled_zero_new_buffers(handle_t *handle, + struct page *page, + unsigned from, unsigned to) { unsigned int block_start = 0, block_end; struct buffer_head *head, *bh; @@ -1396,7 +1398,7 @@ static void zero_new_buffers(struct page *page, unsigned from, unsigned to) size = min(to, block_end) - start; zero_user(page, start, size); - set_buffer_uptodate(bh); + write_end_fn(handle, bh); } clear_buffer_new(bh); } @@ -1428,15 +1430,16 @@ static int ext4_journalled_write_end(struct file *file, if (ext4_has_inline_data(inode)) copied = ext4_write_inline_data_end(inode, pos, len, copied, page); - else { - if (copied < len) { - if (!PageUptodate(page)) - copied = 0; - zero_new_buffers(page, from+copied, to); - } - + else if (unlikely(copied < len) && !PageUptodate(page)) { + copied = 0; + ext4_journalled_zero_new_buffers(handle, page, from, to); + } else { + if (unlikely(copied < len)) + ext4_journalled_zero_new_buffers(handle, page, + from + copied, to); ret = ext4_walk_page_buffers(handle, page_buffers(page), from, - to, &partial, write_end_fn); + from + copied, &partial, + write_end_fn); if (!partial) SetPageUptodate(page); } From 68ca0fdac41fa801e1e6c2f99e27a9c24e494532 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 1 Feb 2017 21:07:11 -0500 Subject: [PATCH 072/154] ext4: fix use-after-iput when fscrypt contexts are inconsistent commit dd01b690f8f4b1e414f89e5a9a5326bf720d6652 upstream. In the case where the child's encryption context was inconsistent with its parent directory, we were using inode->i_sb and inode->i_ino after the inode had already been iput(). Fix this by doing the iput() in the correct places. Note: only ext4 had this bug, not f2fs and ubifs. Fixes: d9cdc9033181 ("ext4 crypto: enforce context consistency") Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 104f8bfba718..c4a389a6027b 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1616,13 +1616,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi !fscrypt_has_permitted_context(dir, inode)) { int nokey = ext4_encrypted_inode(inode) && !fscrypt_has_encryption_key(inode); - iput(inode); - if (nokey) + if (nokey) { + iput(inode); return ERR_PTR(-ENOKEY); + } ext4_warning(inode->i_sb, "Inconsistent encryption contexts: %lu/%lu", (unsigned long) dir->i_ino, (unsigned long) inode->i_ino); + iput(inode); return ERR_PTR(-EPERM); } } From 0b37d0c0c6b35317bfc3aee8cdf94f853d594e5f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 4 Feb 2017 23:04:00 -0500 Subject: [PATCH 073/154] ext4: fix inline data error paths commit eb5efbcb762aee4b454b04f7115f73ccbcf8f0ef upstream. The write_end() function must always unlock the page and drop its ref count, even on an error. Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 9 ++++++++- fs/ext4/inode.c | 20 +++++++++++++++----- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 028329721bbe..37b521ed39df 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -933,8 +933,15 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, struct page *page) { int i_size_changed = 0; + int ret; - copied = ext4_write_inline_data_end(inode, pos, len, copied, page); + ret = ext4_write_inline_data_end(inode, pos, len, copied, page); + if (ret < 0) { + unlock_page(page); + put_page(page); + return ret; + } + copied = ret; /* * No need to use i_size_read() here, the i_size diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a48b17cc0cd1..1d4f5faa04b5 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1324,8 +1324,11 @@ static int ext4_write_end(struct file *file, if (ext4_has_inline_data(inode)) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); - if (ret < 0) + if (ret < 0) { + unlock_page(page); + put_page(page); goto errout; + } copied = ret; } else copied = block_write_end(file, mapping, pos, @@ -1427,10 +1430,16 @@ static int ext4_journalled_write_end(struct file *file, BUG_ON(!ext4_handle_valid(handle)); - if (ext4_has_inline_data(inode)) - copied = ext4_write_inline_data_end(inode, pos, len, - copied, page); - else if (unlikely(copied < len) && !PageUptodate(page)) { + if (ext4_has_inline_data(inode)) { + ret = ext4_write_inline_data_end(inode, pos, len, + copied, page); + if (ret < 0) { + unlock_page(page); + put_page(page); + goto errout; + } + copied = ret; + } else if (unlikely(copied < len) && !PageUptodate(page)) { copied = 0; ext4_journalled_zero_new_buffers(handle, page, from, to); } else { @@ -1465,6 +1474,7 @@ static int ext4_journalled_write_end(struct file *file, */ ext4_orphan_add(handle, inode); +errout: ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; From 269bf7b8c5db7544133dcff03bd741f980e1f110 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 4 Feb 2017 23:38:06 -0500 Subject: [PATCH 074/154] ext4: preserve the needs_recovery flag when the journal is aborted commit 97abd7d4b5d9c48ec15c425485f054e1c15e591b upstream. If the journal is aborted, the needs_recovery feature flag should not be removed. Otherwise, it's the journal might not get replayed and this could lead to more data getting lost. Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index bbc316db9495..322119c27348 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -825,6 +825,7 @@ static void ext4_put_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; + int aborted = 0; int i, err; ext4_unregister_li_request(sb); @@ -834,9 +835,10 @@ static void ext4_put_super(struct super_block *sb) destroy_workqueue(sbi->rsv_conversion_wq); if (sbi->s_journal) { + aborted = is_journal_aborted(sbi->s_journal); err = jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; - if (err < 0) + if ((err < 0) && !aborted) ext4_abort(sb, "Couldn't clean up the journal"); } @@ -847,7 +849,7 @@ static void ext4_put_super(struct super_block *sb) ext4_mb_release(sb); ext4_ext_release(sb); - if (!(sb->s_flags & MS_RDONLY)) { + if (!(sb->s_flags & MS_RDONLY) && !aborted) { ext4_clear_feature_journal_needs_recovery(sb); es->s_state = cpu_to_le16(sbi->s_mount_state); } From d6dcec965bc53eb375e50642b3b1abb9b835c2a7 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 5 Feb 2017 01:26:48 -0500 Subject: [PATCH 075/154] ext4: return EROFS if device is r/o and journal replay is needed commit 4753d8a24d4588657bc0a4cd66d4e282dff15c8c upstream. If the file system requires journal recovery, and the device is read-ony, return EROFS to the mount system call. This allows xfstests generic/050 to pass. Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 322119c27348..afe29ba42a4e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3913,7 +3913,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * root first: it may be modified in the journal! */ if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) { - if (ext4_load_journal(sb, es, journal_devnum)) + err = ext4_load_journal(sb, es, journal_devnum); + if (err) goto failed_mount3a; } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && ext4_has_feature_journal_needs_recovery(sb)) { From d6407e10bcf540a67afbeca83e60a4323cba8b33 Mon Sep 17 00:00:00 2001 From: Mathias Svensson Date: Fri, 6 Jan 2017 13:32:39 -0800 Subject: [PATCH 076/154] samples/seccomp: fix 64-bit comparison macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 916cafdc95843fb9af5fd5f83ca499d75473d107 upstream. There were some bugs in the JNE64 and JLT64 comparision macros. This fixes them, improves comments, and cleans up the file while we are at it. Reported-by: Stephen Röttger Signed-off-by: Mathias Svensson Signed-off-by: Kees Cook Signed-off-by: James Morris Signed-off-by: Greg Kroah-Hartman --- samples/seccomp/bpf-helper.h | 139 ++++++++++++++++++++--------------- 1 file changed, 79 insertions(+), 60 deletions(-) diff --git a/samples/seccomp/bpf-helper.h b/samples/seccomp/bpf-helper.h index 38ee70f3cd5b..1d8de9edd858 100644 --- a/samples/seccomp/bpf-helper.h +++ b/samples/seccomp/bpf-helper.h @@ -138,7 +138,7 @@ union arg64 { #define ARG_32(idx) \ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, LO_ARG(idx)) -/* Loads hi into A and lo in X */ +/* Loads lo into M[0] and hi into M[1] and A */ #define ARG_64(idx) \ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, LO_ARG(idx)), \ BPF_STMT(BPF_ST, 0), /* lo -> M[0] */ \ @@ -153,62 +153,14 @@ union arg64 { BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (value), 1, 0), \ jt -/* Checks the lo, then swaps to check the hi. A=lo,X=hi */ -#define JEQ64(lo, hi, jt) \ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ - BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (lo), 0, 2), \ - BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ - jt, \ - BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ - -#define JNE64(lo, hi, jt) \ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 5, 0), \ - BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (lo), 2, 0), \ - BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ - jt, \ - BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ - #define JA32(value, jt) \ BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (value), 0, 1), \ jt -#define JA64(lo, hi, jt) \ - BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (hi), 3, 0), \ - BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ - BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (lo), 0, 2), \ - BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ - jt, \ - BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ - #define JGE32(value, jt) \ BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (value), 0, 1), \ jt -#define JLT32(value, jt) \ - BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (value), 1, 0), \ - jt - -/* Shortcut checking if hi > arg.hi. */ -#define JGE64(lo, hi, jt) \ - BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 4, 0), \ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ - BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ - BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (lo), 0, 2), \ - BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ - jt, \ - BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ - -#define JLT64(lo, hi, jt) \ - BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (hi), 0, 4), \ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ - BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ - BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (lo), 2, 0), \ - BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ - jt, \ - BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ - #define JGT32(value, jt) \ BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (value), 0, 1), \ jt @@ -217,24 +169,91 @@ union arg64 { BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (value), 1, 0), \ jt -/* Check hi > args.hi first, then do the GE checking */ -#define JGT64(lo, hi, jt) \ - BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 4, 0), \ +#define JLT32(value, jt) \ + BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (value), 1, 0), \ + jt + +/* + * All the JXX64 checks assume lo is saved in M[0] and hi is saved in both + * A and M[1]. This invariant is kept by restoring A if necessary. + */ +#define JEQ64(lo, hi, jt) \ + /* if (hi != arg.hi) goto NOMATCH; */ \ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ - BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (lo), 0, 2), \ - BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ + /* if (lo != arg.lo) goto NOMATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (lo), 0, 2), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), \ jt, \ - BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ + BPF_STMT(BPF_LD+BPF_MEM, 1) + +#define JNE64(lo, hi, jt) \ + /* if (hi != arg.hi) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 3), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), \ + /* if (lo != arg.lo) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (lo), 2, 0), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) + +#define JA64(lo, hi, jt) \ + /* if (hi & arg.hi) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (hi), 3, 0), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), \ + /* if (lo & arg.lo) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (lo), 0, 2), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) + +#define JGE64(lo, hi, jt) \ + /* if (hi > arg.hi) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 4, 0), \ + /* if (hi != arg.hi) goto NOMATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), \ + /* if (lo >= arg.lo) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (lo), 0, 2), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) + +#define JGT64(lo, hi, jt) \ + /* if (hi > arg.hi) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 4, 0), \ + /* if (hi != arg.hi) goto NOMATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), \ + /* if (lo > arg.lo) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (lo), 0, 2), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) #define JLE64(lo, hi, jt) \ - BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 6, 0), \ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 3), \ - BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ + /* if (hi < arg.hi) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (hi), 0, 4), \ + /* if (hi != arg.hi) goto NOMATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), \ + /* if (lo <= arg.lo) goto MATCH; */ \ BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (lo), 2, 0), \ - BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ + BPF_STMT(BPF_LD+BPF_MEM, 1), \ jt, \ - BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ + BPF_STMT(BPF_LD+BPF_MEM, 1) + +#define JLT64(lo, hi, jt) \ + /* if (hi < arg.hi) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (hi), 0, 4), \ + /* if (hi != arg.hi) goto NOMATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), \ + /* if (lo < arg.lo) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (lo), 2, 0), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) #define LOAD_SYSCALL_NR \ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ From d2a8cd3eee0fe9b14fe3a72c02cffa7df31a2383 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Wed, 8 Feb 2017 00:41:45 +0200 Subject: [PATCH 077/154] mei: remove support for broken parallel read commit cb97fbbcac15982406e0c74cd5512a8b6fcf10b3 upstream. Parallel reads from multiple threads on a file descriptor are not well defined and racy. It is safer to return to original behavior and simply fail the additional read. The solution is to remove request for next read credit. Fixes: ff1586a7ea57 ("mei: enqueue consecutive reads") Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/main.c | 48 ++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index fa50635512e8..41f318631c6d 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -182,32 +182,36 @@ static ssize_t mei_read(struct file *file, char __user *ubuf, goto out; } - if (rets == -EBUSY && - !mei_cl_enqueue_ctrl_wr_cb(cl, length, MEI_FOP_READ, file)) { - rets = -ENOMEM; + +again: + mutex_unlock(&dev->device_lock); + if (wait_event_interruptible(cl->rx_wait, + !list_empty(&cl->rd_completed) || + !mei_cl_is_connected(cl))) { + if (signal_pending(current)) + return -EINTR; + return -ERESTARTSYS; + } + mutex_lock(&dev->device_lock); + + if (!mei_cl_is_connected(cl)) { + rets = -ENODEV; goto out; } - do { - mutex_unlock(&dev->device_lock); + cb = mei_cl_read_cb(cl, file); + if (!cb) { + /* + * For amthif all the waiters are woken up, + * but only fp with matching cb->fp get the cb, + * the others have to return to wait on read. + */ + if (cl == &dev->iamthif_cl) + goto again; - if (wait_event_interruptible(cl->rx_wait, - (!list_empty(&cl->rd_completed)) || - (!mei_cl_is_connected(cl)))) { - - if (signal_pending(current)) - return -EINTR; - return -ERESTARTSYS; - } - - mutex_lock(&dev->device_lock); - if (!mei_cl_is_connected(cl)) { - rets = -ENODEV; - goto out; - } - - cb = mei_cl_read_cb(cl, file); - } while (!cb); + rets = 0; + goto out; + } copy_buffer: /* now copy the data to user space */ From bac7f7135d51144ff193002b296b8bdf06c9855f Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Thu, 2 Feb 2017 08:32:18 +0200 Subject: [PATCH 078/154] ath10k: fix boot failure in UTF mode/testmode commit cb4281528b62207918b1e95827cad7527aa4dbaa upstream. Rx filter reset and the dynamic tx switch mode (EXT_RESOURCE_CFG) configuration are causing the following errors when UTF firmware is loaded to the target. Error message 1: [ 598.015629] ath10k_pci 0001:01:00.0: failed to ping firmware: -110 [ 598.020828] ath10k_pci 0001:01:00.0: failed to reset rx filter: -110 [ 598.141556] ath10k_pci 0001:01:00.0: failed to start core (testmode): -110 Error message 2: [ 668.615839] ath10k_ahb a000000.wifi: failed to send ext resource cfg command : -95 [ 668.618902] ath10k_ahb a000000.wifi: failed to start core (testmode): -95 Avoiding these configurations while bringing the target in testmode is solving the problem. Signed-off-by: Tamizh chelvam Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/core.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 0c4532227f25..972b5e224d5d 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -1901,7 +1901,8 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, ath10k_dbg(ar, ATH10K_DBG_BOOT, "firmware %s booted\n", ar->hw->wiphy->fw_version); - if (test_bit(WMI_SERVICE_EXT_RES_CFG_SUPPORT, ar->wmi.svc_map)) { + if (test_bit(WMI_SERVICE_EXT_RES_CFG_SUPPORT, ar->wmi.svc_map) && + mode == ATH10K_FIRMWARE_MODE_NORMAL) { val = 0; if (ath10k_peer_stats_enabled(ar)) val = WMI_10_4_PEER_STATS; @@ -1954,10 +1955,13 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, * possible to implicitly make it correct by creating a dummy vdev and * then deleting it. */ - status = ath10k_core_reset_rx_filter(ar); - if (status) { - ath10k_err(ar, "failed to reset rx filter: %d\n", status); - goto err_hif_stop; + if (mode == ATH10K_FIRMWARE_MODE_NORMAL) { + status = ath10k_core_reset_rx_filter(ar); + if (status) { + ath10k_err(ar, + "failed to reset rx filter: %d\n", status); + goto err_hif_stop; + } } /* If firmware indicates Full Rx Reorder support it must be used in a From 93c1f1db1a7421545e9aae9fb6091668ca570f2b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 11 Jan 2017 16:32:13 +0200 Subject: [PATCH 079/154] ath5k: drop bogus warning on drv_set_key with unsupported cipher commit a70e1d6fd6b5e1a81fa6171600942bee34f5128f upstream. Simply return -EOPNOTSUPP instead. Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath5k/mac80211-ops.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c index dc44cfef7517..16e052d02c94 100644 --- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c +++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c @@ -502,8 +502,7 @@ ath5k_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, break; return -EOPNOTSUPP; default: - WARN_ON(1); - return -EINVAL; + return -EOPNOTSUPP; } mutex_lock(&ah->lock); From c41cae06bf6027d2f77ebbb12bc17da1cd6e131f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 2 Feb 2017 10:14:52 +0100 Subject: [PATCH 080/154] ath9k: fix race condition in enabling/disabling IRQs commit 3a5e969bb2f6692a256352649355d56d018d6b88 upstream. The code currently relies on refcounting to disable IRQs from within the IRQ handler and re-enabling them again after the tasklet has run. However, due to race conditions sometimes the IRQ handler might be called twice, or the tasklet may not run at all (if interrupted in the middle of a reset). This can cause nasty imbalances in the irq-disable refcount which will get the driver permanently stuck until the entire radio has been stopped and started again (ath_reset will not recover from this). Instead of using this fragile logic, change the code to ensure that running the irq handler during tasklet processing is safe, and leave the refcount untouched. Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/ath9k.h | 1 + drivers/net/wireless/ath/ath9k/init.c | 1 + drivers/net/wireless/ath/ath9k/mac.c | 44 ++++++++++++++++++++------ drivers/net/wireless/ath/ath9k/mac.h | 1 + drivers/net/wireless/ath/ath9k/main.c | 27 +++++++--------- 5 files changed, 48 insertions(+), 26 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h index 26fc8ecfe8c4..a7316710a902 100644 --- a/drivers/net/wireless/ath/ath9k/ath9k.h +++ b/drivers/net/wireless/ath/ath9k/ath9k.h @@ -959,6 +959,7 @@ struct ath_softc { struct survey_info *cur_survey; struct survey_info survey[ATH9K_NUM_CHANNELS]; + spinlock_t intr_lock; struct tasklet_struct intr_tq; struct tasklet_struct bcon_tasklet; struct ath_hw *sc_ah; diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index cfa3fe82ade3..297d4bbc5c05 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -626,6 +626,7 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc, common->bt_ant_diversity = 1; spin_lock_init(&common->cc_lock); + spin_lock_init(&sc->intr_lock); spin_lock_init(&sc->sc_serial_rw); spin_lock_init(&sc->sc_pm_lock); spin_lock_init(&sc->chan_lock); diff --git a/drivers/net/wireless/ath/ath9k/mac.c b/drivers/net/wireless/ath/ath9k/mac.c index bba85d1a6cd1..d937c39b3a0b 100644 --- a/drivers/net/wireless/ath/ath9k/mac.c +++ b/drivers/net/wireless/ath/ath9k/mac.c @@ -805,21 +805,12 @@ void ath9k_hw_disable_interrupts(struct ath_hw *ah) } EXPORT_SYMBOL(ath9k_hw_disable_interrupts); -void ath9k_hw_enable_interrupts(struct ath_hw *ah) +static void __ath9k_hw_enable_interrupts(struct ath_hw *ah) { struct ath_common *common = ath9k_hw_common(ah); u32 sync_default = AR_INTR_SYNC_DEFAULT; u32 async_mask; - if (!(ah->imask & ATH9K_INT_GLOBAL)) - return; - - if (!atomic_inc_and_test(&ah->intr_ref_cnt)) { - ath_dbg(common, INTERRUPT, "Do not enable IER ref count %d\n", - atomic_read(&ah->intr_ref_cnt)); - return; - } - if (AR_SREV_9340(ah) || AR_SREV_9550(ah) || AR_SREV_9531(ah) || AR_SREV_9561(ah)) sync_default &= ~AR_INTR_SYNC_HOST1_FATAL; @@ -841,6 +832,39 @@ void ath9k_hw_enable_interrupts(struct ath_hw *ah) ath_dbg(common, INTERRUPT, "AR_IMR 0x%x IER 0x%x\n", REG_READ(ah, AR_IMR), REG_READ(ah, AR_IER)); } + +void ath9k_hw_resume_interrupts(struct ath_hw *ah) +{ + struct ath_common *common = ath9k_hw_common(ah); + + if (!(ah->imask & ATH9K_INT_GLOBAL)) + return; + + if (atomic_read(&ah->intr_ref_cnt) != 0) { + ath_dbg(common, INTERRUPT, "Do not enable IER ref count %d\n", + atomic_read(&ah->intr_ref_cnt)); + return; + } + + __ath9k_hw_enable_interrupts(ah); +} +EXPORT_SYMBOL(ath9k_hw_resume_interrupts); + +void ath9k_hw_enable_interrupts(struct ath_hw *ah) +{ + struct ath_common *common = ath9k_hw_common(ah); + + if (!(ah->imask & ATH9K_INT_GLOBAL)) + return; + + if (!atomic_inc_and_test(&ah->intr_ref_cnt)) { + ath_dbg(common, INTERRUPT, "Do not enable IER ref count %d\n", + atomic_read(&ah->intr_ref_cnt)); + return; + } + + __ath9k_hw_enable_interrupts(ah); +} EXPORT_SYMBOL(ath9k_hw_enable_interrupts); void ath9k_hw_set_interrupts(struct ath_hw *ah) diff --git a/drivers/net/wireless/ath/ath9k/mac.h b/drivers/net/wireless/ath/ath9k/mac.h index 3bab01435a86..770fc11b41d1 100644 --- a/drivers/net/wireless/ath/ath9k/mac.h +++ b/drivers/net/wireless/ath/ath9k/mac.h @@ -744,6 +744,7 @@ void ath9k_hw_set_interrupts(struct ath_hw *ah); void ath9k_hw_enable_interrupts(struct ath_hw *ah); void ath9k_hw_disable_interrupts(struct ath_hw *ah); void ath9k_hw_kill_interrupts(struct ath_hw *ah); +void ath9k_hw_resume_interrupts(struct ath_hw *ah); void ar9002_hw_attach_mac_ops(struct ath_hw *ah); diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index e9f32b52fc8c..b868f02ced89 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -373,21 +373,20 @@ void ath9k_tasklet(unsigned long data) struct ath_common *common = ath9k_hw_common(ah); enum ath_reset_type type; unsigned long flags; - u32 status = sc->intrstatus; + u32 status; u32 rxmask; + spin_lock_irqsave(&sc->intr_lock, flags); + status = sc->intrstatus; + sc->intrstatus = 0; + spin_unlock_irqrestore(&sc->intr_lock, flags); + ath9k_ps_wakeup(sc); spin_lock(&sc->sc_pcu_lock); if (status & ATH9K_INT_FATAL) { type = RESET_TYPE_FATAL_INT; ath9k_queue_reset(sc, type); - - /* - * Increment the ref. counter here so that - * interrupts are enabled in the reset routine. - */ - atomic_inc(&ah->intr_ref_cnt); ath_dbg(common, RESET, "FATAL: Skipping interrupts\n"); goto out; } @@ -403,11 +402,6 @@ void ath9k_tasklet(unsigned long data) type = RESET_TYPE_BB_WATCHDOG; ath9k_queue_reset(sc, type); - /* - * Increment the ref. counter here so that - * interrupts are enabled in the reset routine. - */ - atomic_inc(&ah->intr_ref_cnt); ath_dbg(common, RESET, "BB_WATCHDOG: Skipping interrupts\n"); goto out; @@ -420,7 +414,6 @@ void ath9k_tasklet(unsigned long data) if ((sc->gtt_cnt >= MAX_GTT_CNT) && !ath9k_hw_check_alive(ah)) { type = RESET_TYPE_TX_GTT; ath9k_queue_reset(sc, type); - atomic_inc(&ah->intr_ref_cnt); ath_dbg(common, RESET, "GTT: Skipping interrupts\n"); goto out; @@ -477,7 +470,7 @@ void ath9k_tasklet(unsigned long data) ath9k_btcoex_handle_interrupt(sc, status); /* re-enable hardware interrupt */ - ath9k_hw_enable_interrupts(ah); + ath9k_hw_resume_interrupts(ah); out: spin_unlock(&sc->sc_pcu_lock); ath9k_ps_restore(sc); @@ -541,7 +534,9 @@ irqreturn_t ath_isr(int irq, void *dev) return IRQ_NONE; /* Cache the status */ - sc->intrstatus = status; + spin_lock(&sc->intr_lock); + sc->intrstatus |= status; + spin_unlock(&sc->intr_lock); if (status & SCHED_INTR) sched = true; @@ -587,7 +582,7 @@ chip_reset: if (sched) { /* turn off every interrupt */ - ath9k_hw_disable_interrupts(ah); + ath9k_hw_kill_interrupts(ah); tasklet_schedule(&sc->intr_tq); } From 787fd7a6c8ede68e55321c5bfbea9f1ca89a8148 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Tue, 14 Feb 2017 20:10:30 +0100 Subject: [PATCH 081/154] ath9k: use correct OTP register offsets for the AR9340 and AR9550 commit c9f1e32600816d695f817477d56490bfc2ba43c6 upstream. This patch fixes the OTP register definitions for the AR934x and AR9550 WMAC SoC. Previously, the ath9k driver was unable to initialize the integrated WMAC on an Aerohive AP121: | ath: phy0: timeout (1000 us) on reg 0x30018: 0xbadc0ffe & 0x00000007 != 0x00000004 | ath: phy0: timeout (1000 us) on reg 0x30018: 0xbadc0ffe & 0x00000007 != 0x00000004 | ath: phy0: Unable to initialize hardware; initialization status: -5 | ath9k ar934x_wmac: failed to initialize device | ath9k: probe of ar934x_wmac failed with error -5 It turns out that the AR9300_OTP_STATUS and AR9300_OTP_DATA definitions contain a typo. Cc: Gabor Juhos Fixes: add295a4afbdf5852d0 "ath9k: use correct OTP register offsets for AR9550" Signed-off-by: Christian Lamparter Signed-off-by: Chris Blake Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/ar9003_eeprom.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.h b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.h index 107bcfbbe0fb..cb37bf01920e 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.h +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.h @@ -73,13 +73,13 @@ #define AR9300_OTP_BASE \ ((AR_SREV_9340(ah) || AR_SREV_9550(ah)) ? 0x30000 : 0x14000) #define AR9300_OTP_STATUS \ - ((AR_SREV_9340(ah) || AR_SREV_9550(ah)) ? 0x30018 : 0x15f18) + ((AR_SREV_9340(ah) || AR_SREV_9550(ah)) ? 0x31018 : 0x15f18) #define AR9300_OTP_STATUS_TYPE 0x7 #define AR9300_OTP_STATUS_VALID 0x4 #define AR9300_OTP_STATUS_ACCESS_BUSY 0x2 #define AR9300_OTP_STATUS_SM_BUSY 0x1 #define AR9300_OTP_READ_DATA \ - ((AR_SREV_9340(ah) || AR_SREV_9550(ah)) ? 0x3001c : 0x15f1c) + ((AR_SREV_9340(ah) || AR_SREV_9550(ah)) ? 0x3101c : 0x15f1c) enum targetPowerHTRates { HT_TARGET_RATE_0_8_16, From bc5338a4fd3d9042107ce3fe40792c1c4839ebab Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 10 Feb 2017 15:18:46 -0600 Subject: [PATCH 082/154] PCI: hv: Fix wslot_to_devfn() to fix warnings on device removal commit 60e2e2fbafdd1285ae1b4ad39ded41603e0c74d0 upstream. The devfn of 00:02.0 is 0x10. devfn_to_wslot(0x10) == 0x2, and wslot_to_devfn(0x2) should be 0x10, while it's 0x2 in the current code. Due to this, hv_eject_device_work() -> pci_get_domain_bus_and_slot() returns NULL and pci_stop_and_remove_bus_device() is not called. Later when the real device driver's .remove() is invoked by hv_pci_remove() -> pci_stop_root_bus(), some warnings can be noticed because the VM has lost the access to the underlying device at that time. Signed-off-by: Jake Oshins Signed-off-by: Dexuan Cui Signed-off-by: Bjorn Helgaas Acked-by: Haiyang Zhang CC: K. Y. Srinivasan CC: Stephen Hemminger Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pci-hyperv.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index 763ff8745828..61fc349c96d4 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -130,7 +130,8 @@ union pci_version { */ union win_slot_encoding { struct { - u32 func:8; + u32 dev:5; + u32 func:3; u32 reserved:24; } bits; u32 slot; @@ -483,7 +484,8 @@ static u32 devfn_to_wslot(int devfn) union win_slot_encoding wslot; wslot.slot = 0; - wslot.bits.func = PCI_SLOT(devfn) | (PCI_FUNC(devfn) << 5); + wslot.bits.dev = PCI_SLOT(devfn); + wslot.bits.func = PCI_FUNC(devfn); return wslot.slot; } @@ -501,7 +503,7 @@ static int wslot_to_devfn(u32 wslot) union win_slot_encoding slot_no; slot_no.slot = wslot; - return PCI_DEVFN(0, slot_no.bits.func); + return PCI_DEVFN(slot_no.bits.dev, slot_no.bits.func); } /* From 1fb738a3dc1304250c755e5e31715137c1c44c50 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 16 Feb 2017 10:22:34 +1100 Subject: [PATCH 083/154] pci/hotplug/pnv-php: Disable MSI and PCI device properly commit 49f4b08e61547a5ccd2db551d994c4503efe5666 upstream. pnv_php_disable_irq() can be called in two paths: Bailing path in pnv_php_enable_irq() or releasing slot. The MSI (or MSIx) interrupts is disabled unconditionally in pnv_php_disable_irq(). It's wrong because that might be enabled by drivers other than pnv-php. This disables MSI (or MSIx) interrupts and the PCI device only if it was enabled by pnv-php. In the error path of pnv_php_enable_irq(), we rely on the newly added parameter @disable_device. In the path of releasing slot, @pnv_php->irq is checked. Fixes: 360aebd85a4c ("drivers/pci/hotplug: Support surprise hotplug in powernv driver") Signed-off-by: Gavin Shan Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/pnv_php.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 56efaf72d08e..acb2be0c8c2c 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -35,9 +35,11 @@ static void pnv_php_register(struct device_node *dn); static void pnv_php_unregister_one(struct device_node *dn); static void pnv_php_unregister(struct device_node *dn); -static void pnv_php_disable_irq(struct pnv_php_slot *php_slot) +static void pnv_php_disable_irq(struct pnv_php_slot *php_slot, + bool disable_device) { struct pci_dev *pdev = php_slot->pdev; + int irq = php_slot->irq; u16 ctrl; if (php_slot->irq > 0) { @@ -56,10 +58,14 @@ static void pnv_php_disable_irq(struct pnv_php_slot *php_slot) php_slot->wq = NULL; } - if (pdev->msix_enabled) - pci_disable_msix(pdev); - else if (pdev->msi_enabled) - pci_disable_msi(pdev); + if (disable_device || irq > 0) { + if (pdev->msix_enabled) + pci_disable_msix(pdev); + else if (pdev->msi_enabled) + pci_disable_msi(pdev); + + pci_disable_device(pdev); + } } static void pnv_php_free_slot(struct kref *kref) @@ -68,7 +74,7 @@ static void pnv_php_free_slot(struct kref *kref) struct pnv_php_slot, kref); WARN_ON(!list_empty(&php_slot->children)); - pnv_php_disable_irq(php_slot); + pnv_php_disable_irq(php_slot, false); kfree(php_slot->name); kfree(php_slot); } @@ -759,7 +765,7 @@ static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq) php_slot->wq = alloc_workqueue("pciehp-%s", 0, 0, php_slot->name); if (!php_slot->wq) { dev_warn(&pdev->dev, "Cannot alloc workqueue\n"); - pnv_php_disable_irq(php_slot); + pnv_php_disable_irq(php_slot, true); return; } @@ -772,7 +778,7 @@ static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq) ret = request_irq(irq, pnv_php_interrupt, IRQF_SHARED, php_slot->name, php_slot); if (ret) { - pnv_php_disable_irq(php_slot); + pnv_php_disable_irq(php_slot, true); dev_warn(&pdev->dev, "Error %d enabling IRQ %d\n", ret, irq); return; } From 730b1b20c6cdc63f4ce32efc5a494fc9b141b854 Mon Sep 17 00:00:00 2001 From: Ley Foon Tan Date: Tue, 28 Feb 2017 18:37:16 +0800 Subject: [PATCH 084/154] PCI: altera: Fix TLP_CFG_DW0 for TLP write commit 2a7275a3d867b228216886aae35e1f64291180b1 upstream. eb5767122feb ("PCI: altera: Simplify TLB_CFG_DW0 usage") used TLP_FMTTYPE_CFGRD* (instead of TLP_FMTTYPE_CFGWR*) for TLP writes, which causes writing to configuration space to fail. Fix it by using correct FMTTYPE for write operation. Fixes: eb5767122feb ("PCI: altera: Simplify TLB_CFG_DW0 usage") Signed-off-by: Ley Foon Tan Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pcie-altera.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/pci/host/pcie-altera.c b/drivers/pci/host/pcie-altera.c index b0ac4dfafa0b..f2907e7adb5d 100644 --- a/drivers/pci/host/pcie-altera.c +++ b/drivers/pci/host/pcie-altera.c @@ -57,10 +57,14 @@ #define TLP_WRITE_TAG 0x10 #define RP_DEVFN 0 #define TLP_REQ_ID(bus, devfn) (((bus) << 8) | (devfn)) -#define TLP_CFG_DW0(pcie, bus) \ +#define TLP_CFGRD_DW0(pcie, bus) \ ((((bus == pcie->root_bus_nr) ? TLP_FMTTYPE_CFGRD0 \ : TLP_FMTTYPE_CFGRD1) << 24) | \ TLP_PAYLOAD_SIZE) +#define TLP_CFGWR_DW0(pcie, bus) \ + ((((bus == pcie->root_bus_nr) ? TLP_FMTTYPE_CFGWR0 \ + : TLP_FMTTYPE_CFGWR1) << 24) | \ + TLP_PAYLOAD_SIZE) #define TLP_CFG_DW1(pcie, tag, be) \ (((TLP_REQ_ID(pcie->root_bus_nr, RP_DEVFN)) << 16) | (tag << 8) | (be)) #define TLP_CFG_DW2(bus, devfn, offset) \ @@ -222,7 +226,7 @@ static int tlp_cfg_dword_read(struct altera_pcie *pcie, u8 bus, u32 devfn, { u32 headers[TLP_HDR_SIZE]; - headers[0] = TLP_CFG_DW0(pcie, bus); + headers[0] = TLP_CFGRD_DW0(pcie, bus); headers[1] = TLP_CFG_DW1(pcie, TLP_READ_TAG, byte_en); headers[2] = TLP_CFG_DW2(bus, devfn, where); @@ -237,7 +241,7 @@ static int tlp_cfg_dword_write(struct altera_pcie *pcie, u8 bus, u32 devfn, u32 headers[TLP_HDR_SIZE]; int ret; - headers[0] = TLP_CFG_DW0(pcie, bus); + headers[0] = TLP_CFGWR_DW0(pcie, bus); headers[1] = TLP_CFG_DW1(pcie, TLP_WRITE_TAG, byte_en); headers[2] = TLP_CFG_DW2(bus, devfn, where); From 65013a93b6c335bd166c911d73c47ff4bcbdb8be Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 7 Dec 2016 01:16:24 -0800 Subject: [PATCH 085/154] Drivers: hv: vmbus: Raise retry/wait limits in vmbus_post_msg() commit c0bb03924f1a80e7f65900e36c8e6b3dc167c5f8 upstream. DoS protection conditions were altered in WS2016 and now it's easy to get -EAGAIN returned from vmbus_post_msg() (e.g. when we try changing MTU on a netvsc device in a loop). All vmbus_post_msg() callers don't retry the operation and we usually end up with a non-functional device or crash. While host's DoS protection conditions are unknown to me my tests show that it can take up to 10 seconds before the message is sent so doing udelay() is not an option, we really need to sleep. Almost all vmbus_post_msg() callers are ready to sleep but there is one special case: vmbus_initiate_unload() which can be called from interrupt/NMI context and we can't sleep there. I'm also not sure about the lonely vmbus_send_tl_connect_request() which has no in-tree users but its external users are most likely waiting for the host to reply so sleeping there is also appropriate. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 17 +++++++++-------- drivers/hv/channel_mgmt.c | 10 ++++++---- drivers/hv/connection.c | 17 ++++++++++++----- drivers/hv/hyperv_vmbus.h | 2 +- 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 5fb4c6d9209b..d5b8d9fd50bb 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -181,7 +181,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); ret = vmbus_post_msg(open_msg, - sizeof(struct vmbus_channel_open_channel)); + sizeof(struct vmbus_channel_open_channel), true); if (ret != 0) { err = ret; @@ -233,7 +233,7 @@ int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, conn_msg.guest_endpoint_id = *shv_guest_servie_id; conn_msg.host_service_id = *shv_host_servie_id; - return vmbus_post_msg(&conn_msg, sizeof(conn_msg)); + return vmbus_post_msg(&conn_msg, sizeof(conn_msg), true); } EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request); @@ -419,7 +419,7 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); ret = vmbus_post_msg(gpadlmsg, msginfo->msgsize - - sizeof(*msginfo)); + sizeof(*msginfo), true); if (ret != 0) goto cleanup; @@ -433,8 +433,8 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, gpadl_body->gpadl = next_gpadl_handle; ret = vmbus_post_msg(gpadl_body, - submsginfo->msgsize - - sizeof(*submsginfo)); + submsginfo->msgsize - sizeof(*submsginfo), + true); if (ret != 0) goto cleanup; @@ -485,8 +485,8 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle) list_add_tail(&info->msglistentry, &vmbus_connection.chn_msg_list); spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); - ret = vmbus_post_msg(msg, - sizeof(struct vmbus_channel_gpadl_teardown)); + ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_gpadl_teardown), + true); if (ret) goto post_msg_err; @@ -557,7 +557,8 @@ static int vmbus_close_internal(struct vmbus_channel *channel) msg->header.msgtype = CHANNELMSG_CLOSECHANNEL; msg->child_relid = channel->offermsg.child_relid; - ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_close_channel)); + ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_close_channel), + true); if (ret) { pr_err("Close failed: close post msg return is %d\n", ret); diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index caf341842464..f3a8b52acb51 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -321,7 +321,8 @@ static void vmbus_release_relid(u32 relid) memset(&msg, 0, sizeof(struct vmbus_channel_relid_released)); msg.child_relid = relid; msg.header.msgtype = CHANNELMSG_RELID_RELEASED; - vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released)); + vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released), + true); } void hv_event_tasklet_disable(struct vmbus_channel *channel) @@ -728,7 +729,8 @@ void vmbus_initiate_unload(bool crash) init_completion(&vmbus_connection.unload_event); memset(&hdr, 0, sizeof(struct vmbus_channel_message_header)); hdr.msgtype = CHANNELMSG_UNLOAD; - vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header)); + vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header), + !crash); /* * vmbus_initiate_unload() is also called on crash and the crash can be @@ -1116,8 +1118,8 @@ int vmbus_request_offers(void) msg->msgtype = CHANNELMSG_REQUESTOFFERS; - ret = vmbus_post_msg(msg, - sizeof(struct vmbus_channel_message_header)); + ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header), + true); if (ret != 0) { pr_err("Unable to request offers - %d\n", ret); diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 78e6368a4423..840b6db0ea4b 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -110,7 +110,8 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); ret = vmbus_post_msg(msg, - sizeof(struct vmbus_channel_initiate_contact)); + sizeof(struct vmbus_channel_initiate_contact), + true); if (ret != 0) { spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); list_del(&msginfo->msglistentry); @@ -434,7 +435,7 @@ void vmbus_on_event(unsigned long data) /* * vmbus_post_msg - Send a msg on the vmbus's message connection */ -int vmbus_post_msg(void *buffer, size_t buflen) +int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep) { union hv_connection_id conn_id; int ret = 0; @@ -449,7 +450,7 @@ int vmbus_post_msg(void *buffer, size_t buflen) * insufficient resources. Retry the operation a couple of * times before giving up. */ - while (retries < 20) { + while (retries < 100) { ret = hv_post_message(conn_id, 1, buffer, buflen); switch (ret) { @@ -472,8 +473,14 @@ int vmbus_post_msg(void *buffer, size_t buflen) } retries++; - udelay(usec); - if (usec < 2048) + if (can_sleep && usec > 1000) + msleep(usec / 1000); + else if (usec < MAX_UDELAY_MS * 1000) + udelay(usec); + else + mdelay(usec / 1000); + + if (usec < 256000) usec *= 2; } return ret; diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 2b13f2a0a71e..8d7f865c1133 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -683,7 +683,7 @@ void vmbus_free_channels(void); int vmbus_connect(void); void vmbus_disconnect(void); -int vmbus_post_msg(void *buffer, size_t buflen); +int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep); void vmbus_on_event(unsigned long data); void vmbus_on_msg_dpc(unsigned long data); From 11a4d644d6d84a34a393a389d5efd4e036b04a62 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Thu, 5 Jan 2017 22:23:31 -0800 Subject: [PATCH 086/154] perf callchain: Reference count maps commit aa33b9b9a2ebb00d33c83a5312d4fbf2d5aeba36 upstream. If dso__load_kcore frees all of the existing maps, but one has already been attached to a callchain cursor node, then we can get a SIGSEGV in any function that happens to try to use this invalid cursor. Use the existing map refcount mechanism to forestall cleanup of a map until the cursor iterates past the node. Signed-off-by: Krister Johansen Tested-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Masami Hiramatsu Cc: Namhyung Kim Fixes: 84c2cafa2889 ("perf tools: Reference count struct map") Link: http://lkml.kernel.org/r/20170106062331.GB2707@templeofstupid.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/callchain.c | 11 +++++++++-- tools/perf/util/callchain.h | 6 ++++++ tools/perf/util/hist.c | 7 +++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index ae58b493af45..ecf6236f3b5f 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -437,7 +437,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) } call->ip = cursor_node->ip; call->ms.sym = cursor_node->sym; - call->ms.map = cursor_node->map; + call->ms.map = map__get(cursor_node->map); list_add_tail(&call->list, &node->val); callchain_cursor_advance(cursor); @@ -462,6 +462,7 @@ add_child(struct callchain_node *parent, list_for_each_entry_safe(call, tmp, &new->val, list) { list_del(&call->list); + map__zput(call->ms.map); free(call); } free(new); @@ -730,6 +731,7 @@ merge_chain_branch(struct callchain_cursor *cursor, callchain_cursor_append(cursor, list->ip, list->ms.map, list->ms.sym); list_del(&list->list); + map__zput(list->ms.map); free(list); } @@ -778,7 +780,8 @@ int callchain_cursor_append(struct callchain_cursor *cursor, } node->ip = ip; - node->map = map; + map__zput(node->map); + node->map = map__get(map); node->sym = sym; cursor->nr++; @@ -945,11 +948,13 @@ static void free_callchain_node(struct callchain_node *node) list_for_each_entry_safe(list, tmp, &node->parent_val, list) { list_del(&list->list); + map__zput(list->ms.map); free(list); } list_for_each_entry_safe(list, tmp, &node->val, list) { list_del(&list->list); + map__zput(list->ms.map); free(list); } @@ -1013,6 +1018,7 @@ int callchain_node__make_parent_list(struct callchain_node *node) goto out; *new = *chain; new->has_children = false; + map__get(new->ms.map); list_add_tail(&new->list, &head); } parent = parent->parent; @@ -1033,6 +1039,7 @@ int callchain_node__make_parent_list(struct callchain_node *node) out: list_for_each_entry_safe(chain, new, &head, list) { list_del(&chain->list); + map__zput(chain->ms.map); free(chain); } return -ENOMEM; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 47cfd1080975..b7cbabb3931f 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -5,6 +5,7 @@ #include #include #include "event.h" +#include "map.h" #include "symbol.h" #define HELP_PAD "\t\t\t\t" @@ -174,8 +175,13 @@ int callchain_merge(struct callchain_cursor *cursor, */ static inline void callchain_cursor_reset(struct callchain_cursor *cursor) { + struct callchain_cursor_node *node; + cursor->nr = 0; cursor->last = &cursor->first; + + for (node = cursor->first; node != NULL; node = node->next) + map__zput(node->map); } int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index a69f027368ef..10849a079026 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1,6 +1,7 @@ #include "util.h" #include "build-id.h" #include "hist.h" +#include "map.h" #include "session.h" #include "sort.h" #include "evlist.h" @@ -1019,6 +1020,10 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, int max_stack_depth, void *arg) { int err, err2; + struct map *alm = NULL; + + if (al && al->map) + alm = map__get(al->map); err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent, iter->evsel, al, max_stack_depth); @@ -1058,6 +1063,8 @@ out: if (!err) err = err2; + map__put(alm); + return err; } From abf74467e7fb19c957eb284798fc41cc8059dc41 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 28 Feb 2017 14:07:25 -0800 Subject: [PATCH 087/154] crypto: testmgr - Pad aes_ccm_enc_tv_template vector commit 1c68bb0f62bf8de8bb30123ea840d5168f25abea upstream. Running with KASAN and crypto tests currently gives BUG: KASAN: global-out-of-bounds in __test_aead+0x9d9/0x2200 at addr ffffffff8212fca0 Read of size 16 by task cryptomgr_test/1107 Address belongs to variable 0xffffffff8212fca0 CPU: 0 PID: 1107 Comm: cryptomgr_test Not tainted 4.10.0+ #45 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.1-1.fc24 04/01/2014 Call Trace: dump_stack+0x63/0x8a kasan_report.part.1+0x4a7/0x4e0 ? __test_aead+0x9d9/0x2200 ? crypto_ccm_init_crypt+0x218/0x3c0 [ccm] kasan_report+0x20/0x30 check_memory_region+0x13c/0x1a0 memcpy+0x23/0x50 __test_aead+0x9d9/0x2200 ? kasan_unpoison_shadow+0x35/0x50 ? alg_test_akcipher+0xf0/0xf0 ? crypto_skcipher_init_tfm+0x2e3/0x310 ? crypto_spawn_tfm2+0x37/0x60 ? crypto_ccm_init_tfm+0xa9/0xd0 [ccm] ? crypto_aead_init_tfm+0x7b/0x90 ? crypto_alloc_tfm+0xc4/0x190 test_aead+0x28/0xc0 alg_test_aead+0x54/0xd0 alg_test+0x1eb/0x3d0 ? alg_find_test+0x90/0x90 ? __sched_text_start+0x8/0x8 ? __wake_up_common+0x70/0xb0 cryptomgr_test+0x4d/0x60 kthread+0x173/0x1c0 ? crypto_acomp_scomp_free_ctx+0x60/0x60 ? kthread_create_on_node+0xa0/0xa0 ret_from_fork+0x2c/0x40 Memory state around the buggy address: ffffffff8212fb80: 00 00 00 00 01 fa fa fa fa fa fa fa 00 00 00 00 ffffffff8212fc00: 00 01 fa fa fa fa fa fa 00 00 00 00 01 fa fa fa >ffffffff8212fc80: fa fa fa fa 00 05 fa fa fa fa fa fa 00 00 00 00 ^ ffffffff8212fd00: 01 fa fa fa fa fa fa fa 00 00 00 00 01 fa fa fa ffffffff8212fd80: fa fa fa fa 00 00 00 00 00 05 fa fa fa fa fa fa This always happens on the same IV which is less than 16 bytes. Per Ard, "CCM IVs are 16 bytes, but due to the way they are constructed internally, the final couple of bytes of input IV are dont-cares. Apparently, we do read all 16 bytes, which triggers the KASAN errors." Fix this by padding the IV with null bytes to be at least 16 bytes. Fixes: 0bc5a6c5c79a ("crypto: testmgr - Disable rfc4309 test and convert test vectors") Acked-by: Ard Biesheuvel Signed-off-by: Laura Abbott Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/testmgr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/testmgr.h b/crypto/testmgr.h index e64a4ef9d8ca..9033088ca231 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -22813,7 +22813,7 @@ static struct aead_testvec aes_ccm_enc_tv_template[] = { "\x09\x75\x9a\x9b\x3c\x9b\x27\x39", .klen = 32, .iv = "\x03\xf9\xd9\x4e\x63\xb5\x3d\x9d" - "\x43\xf6\x1e\x50", + "\x43\xf6\x1e\x50\0\0\0\0", .assoc = "\x57\xf5\x6b\x8b\x57\x5c\x3d\x3b" "\x13\x02\x01\x0c\x83\x4c\x96\x35" "\x8e\xd6\x39\xcf\x7d\x14\x9b\x94" From 1e6be9c19c1286cdc3e87a27b06038361a0165ab Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 22 Feb 2017 20:08:25 +0100 Subject: [PATCH 088/154] fuse: add missing FR_FORCE commit 2e38bea99a80eab408adee27f873a188d57b76cb upstream. fuse_file_put() was missing the "force" flag for the RELEASE request when sending synchronously (fuseblk). If this flag is not set, then a sync request may be interrupted before it is dequeued by the userspace filesystem. In this case the OPEN won't be balanced with a RELEASE. Signed-off-by: Miklos Szeredi Fixes: 5a18ec176c93 ("fuse: fix hang of single threaded fuseblk filesystem") Signed-off-by: Greg Kroah-Hartman --- fs/fuse/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2401c5dabb2a..5ec5870e423a 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -100,6 +100,7 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) iput(req->misc.release.inode); fuse_put_request(ff->fc, req); } else if (sync) { + __set_bit(FR_FORCE, &req->flags); __clear_bit(FR_BACKGROUND, &req->flags); fuse_request_send(ff->fc, req); iput(req->misc.release.inode); From bfb55d4087cfc4346b1ec5a04cbac04e9df10ab4 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 23 Feb 2017 14:26:03 -0800 Subject: [PATCH 089/154] x86/pkeys: Check against max pkey to avoid overflows commit 58ab9a088ddac4efe823471275859d64f735577e upstream. Kirill reported a warning from UBSAN about undefined behavior when using protection keys. He is running on hardware that actually has support for it, which is not widely available. The warning triggers because of very large shifts of integers when doing a pkey_free() of a large, invalid value. This happens because we never check that the pkey "fits" into the mm_pkey_allocation_map(). I do not believe there is any danger here of anything bad happening other than some aliasing issues where somebody could do: pkey_free(35); and the kernel would effectively execute: pkey_free(8); While this might be confusing to an app that was doing something stupid, it has to do something stupid and the effects are limited to the app shooting itself in the foot. Signed-off-by: Dave Hansen Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Cc: kirill.shutemov@linux.intel.com Link: http://lkml.kernel.org/r/20170223222603.A022ED65@viggo.jf.intel.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pkeys.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 34684adb6899..b3b09b98896d 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -46,6 +46,15 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) { + /* + * "Allocated" pkeys are those that have been returned + * from pkey_alloc(). pkey 0 is special, and never + * returned from pkey_alloc(). + */ + if (pkey <= 0) + return false; + if (pkey >= arch_max_pkey()) + return false; return mm_pkey_allocation_map(mm) & (1U << pkey); } @@ -82,12 +91,6 @@ int mm_pkey_alloc(struct mm_struct *mm) static inline int mm_pkey_free(struct mm_struct *mm, int pkey) { - /* - * pkey 0 is special, always allocated and can never - * be freed. - */ - if (!pkey) - return -EINVAL; if (!mm_pkey_is_allocated(mm, pkey)) return -EINVAL; From ac4c8fcf5ebceb0a2b6342707e4835a13f5bcf22 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 25 Jan 2017 12:29:59 +0000 Subject: [PATCH 090/154] arm/arm64: KVM: Enforce unconditional flush to PoC when mapping to stage-2 commit 8f36ebaf21fdae99c091c67e8b6fab33969f2667 upstream. When we fault in a page, we flush it to the PoC (Point of Coherency) if the faulting vcpu has its own caches off, so that it can observe the page we just brought it. But if the vcpu has its caches on, we skip that step. Bad things happen when *another* vcpu tries to access that page with its own caches disabled. At that point, there is no garantee that the data has made it to the PoC, and we access stale data. The obvious fix is to always flush to PoC when a page is faulted in, no matter what the state of the vcpu is. Fixes: 2d58b733c876 ("arm64: KVM: force cache clean on page fault when caches are off") Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/kvm_mmu.h | 9 +-------- arch/arm64/include/asm/kvm_mmu.h | 3 +-- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 74a44727f8e1..a58bbaa3ec60 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -150,18 +150,12 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, * and iterate over the range. */ - bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached; - VM_BUG_ON(size & ~PAGE_MASK); - if (!need_flush && !icache_is_pipt()) - goto vipt_cache; - while (size) { void *va = kmap_atomic_pfn(pfn); - if (need_flush) - kvm_flush_dcache_to_poc(va, PAGE_SIZE); + kvm_flush_dcache_to_poc(va, PAGE_SIZE); if (icache_is_pipt()) __cpuc_coherent_user_range((unsigned long)va, @@ -173,7 +167,6 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, kunmap_atomic(va); } -vipt_cache: if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) { /* any kind of VIPT cache */ __flush_icache_all(); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 6f72fe8b0e3e..6d22017ebbad 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -241,8 +241,7 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, { void *va = page_address(pfn_to_page(pfn)); - if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) - kvm_flush_dcache_to_poc(va, size); + kvm_flush_dcache_to_poc(va, size); if (!icache_is_aliasing()) { /* PIPT */ flush_icache_range((unsigned long)va, From a8123045c94638114a0b1ab19fd218dd3a5cd0b5 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 25 Jan 2017 18:31:31 +0000 Subject: [PATCH 091/154] arm64: dma-mapping: Fix dma_mapping_error() when bypassing SWIOTLB commit adbe7e26f4257f72817495b9bce114284060b0d7 upstream. When bypassing SWIOTLB on small-memory systems, we need to avoid calling into swiotlb_dma_mapping_error() in exactly the same way as we avoid swiotlb_dma_supported(), because the former also relies on SWIOTLB state being initialised. Under the assumptions for which we skip SWIOTLB, dma_map_{single,page}() will only ever return the DMA-offset-adjusted physical address of the page passed in, thus we can report success unconditionally. Fixes: b67a8b29df7e ("arm64: mm: only initialize swiotlb when necessary") CC: Jisheng Zhang Reported-by: Aaro Koskinen Signed-off-by: Robin Murphy Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/dma-mapping.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 02265a589ef5..b5bf46ce873b 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -352,6 +352,13 @@ static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) return 1; } +static int __swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t addr) +{ + if (swiotlb) + return swiotlb_dma_mapping_error(hwdev, addr); + return 0; +} + static struct dma_map_ops swiotlb_dma_ops = { .alloc = __dma_alloc, .free = __dma_free, @@ -366,7 +373,7 @@ static struct dma_map_ops swiotlb_dma_ops = { .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, .sync_sg_for_device = __swiotlb_sync_sg_for_device, .dma_supported = __swiotlb_dma_supported, - .mapping_error = swiotlb_dma_mapping_error, + .mapping_error = __swiotlb_dma_mapping_error, }; static int __init atomic_pool_init(void) From 7127d43e1843400658e42c0b21dc598a59d603e4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 2 Feb 2017 17:32:14 +0000 Subject: [PATCH 092/154] arm64: fix erroneous __raw_read_system_reg() cases commit 7d0928f18bf890d2853281f59aba0dd5a46b34f9 upstream. Since it was introduced in commit da8d02d19ffdd201 ("arm64/capabilities: Make use of system wide safe value"), __raw_read_system_reg() has erroneously mapped some sysreg IDs to other registers. For the fields in ID_ISAR5_EL1, our local feature detection will be erroneous. We may spuriously detect that a feature is uniformly supported, or may fail to detect when it actually is, meaning some compat hwcaps may be erroneous (or not enforced upon hotplug). This patch corrects the erroneous entries. Signed-off-by: Mark Rutland Fixes: da8d02d19ffdd201 ("arm64/capabilities: Make use of system wide safe value") Reported-by: Catalin Marinas Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c02504ea304b..3a129d48674e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -653,15 +653,15 @@ static u64 __raw_read_system_reg(u32 sys_id) case SYS_ID_ISAR2_EL1: return read_cpuid(ID_ISAR2_EL1); case SYS_ID_ISAR3_EL1: return read_cpuid(ID_ISAR3_EL1); case SYS_ID_ISAR4_EL1: return read_cpuid(ID_ISAR4_EL1); - case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR4_EL1); + case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR5_EL1); case SYS_MVFR0_EL1: return read_cpuid(MVFR0_EL1); case SYS_MVFR1_EL1: return read_cpuid(MVFR1_EL1); case SYS_MVFR2_EL1: return read_cpuid(MVFR2_EL1); case SYS_ID_AA64PFR0_EL1: return read_cpuid(ID_AA64PFR0_EL1); - case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR0_EL1); + case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR1_EL1); case SYS_ID_AA64DFR0_EL1: return read_cpuid(ID_AA64DFR0_EL1); - case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR0_EL1); + case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR1_EL1); case SYS_ID_AA64MMFR0_EL1: return read_cpuid(ID_AA64MMFR0_EL1); case SYS_ID_AA64MMFR1_EL1: return read_cpuid(ID_AA64MMFR1_EL1); case SYS_ID_AA64MMFR2_EL1: return read_cpuid(ID_AA64MMFR2_EL1); From d408d23addbaa9af9e0db492f774d3245ecfeefd Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Thu, 2 Feb 2017 20:30:03 -0600 Subject: [PATCH 093/154] KVM: arm/arm64: vgic: Stop injecting the MSI occurrence twice commit 0bdbf3b071986ba80731203683cf623d5c0cacb1 upstream. The IRQFD framework calls the architecture dependent function twice if the corresponding GSI type is edge triggered. For ARM, the function kvm_set_msi() is getting called twice whenever the IRQFD receives the event signal. The rest of the code path is trying to inject the MSI without any validation checks. No need to call the function vgic_its_inject_msi() second time to avoid an unnecessary overhead in IRQ queue logic. It also avoids the possibility of VM seeing the MSI twice. Simple fix, return -1 if the argument 'level' value is zero. Reviewed-by: Eric Auger Reviewed-by: Christoffer Dall Signed-off-by: Shanker Donthineni Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic/vgic-irqfd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index d918dcf26a5a..f138ed2e9c63 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c @@ -99,6 +99,9 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, if (!vgic_has_its(kvm)) return -ENODEV; + if (!level) + return -1; + return vgic_its_inject_msi(kvm, &msi); } From 602bd10f6e947308ffa7f9c2d11d07b9997aaee5 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 1 Feb 2017 21:40:57 +0100 Subject: [PATCH 094/154] iio: pressure: mpl115: do not rely on structure field ordering commit 6a6e1d56a0769795a36c0461c64bf5e5b9bbb4c0 upstream. Fixes a regression triggered by a change in the layout of struct iio_chan_spec, but the real bug is in the driver which assumed a specific structure layout in the first place. Hint: the three bits were not OR:ed together as implied by the indentation prior to this patch, there was a comma between the first two, which accidentally moved the ..._SCALE and ..._OFFSET bits to the next structure field. That field was .info_mask_shared_by_type before the _available attributes was added by commit 51239600074b ("iio:core: add a callback to allow drivers to provide _available attributes") and .info_mask_separate_available afterwards, and the regression happened. info_mask_shared_by_type is actually a better choice than the originally intended info_mask_separate for the ..._SCALE and ..._OFFSET bits since a constant is returned from mpl115_read_raw for the scale/offset. Using info_mask_shared_by_type also preserves the behavior from before the regression and is therefore less likely to cause other interesting side effects. The above mentioned regression causes unintended sysfs attibutes to show up that are not backed by code, in turn causing a NULL pointer defererence to happen on access. Fixes: 3017d90e8931 ("iio: Add Freescale MPL115A2 pressure / temperature sensor driver") Fixes: 51239600074b ("iio:core: add a callback to allow drivers to provide _available attributes") Signed-off-by: Peter Rosin Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/pressure/mpl115.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/pressure/mpl115.c b/drivers/iio/pressure/mpl115.c index 73f2f0c46e62..8f2bce213248 100644 --- a/drivers/iio/pressure/mpl115.c +++ b/drivers/iio/pressure/mpl115.c @@ -137,6 +137,7 @@ static const struct iio_chan_spec mpl115_channels[] = { { .type = IIO_TEMP, .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_OFFSET) | BIT(IIO_CHAN_INFO_SCALE), }, }; From a34546c6234e77dc98f5ac40ad64ee5429c986e6 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 1 Feb 2017 21:40:56 +0100 Subject: [PATCH 095/154] iio: pressure: mpl3115: do not rely on structure field ordering commit 9cf6cdba586ced75c69b8314b88b2d2f5ce9b3ed upstream. Fixes a regression triggered by a change in the layout of struct iio_chan_spec, but the real bug is in the driver which assumed a specific structure layout in the first place. Hint: the two bits were not OR:ed together as implied by the indentation prior to this patch, there was a comma between them, which accidentally moved the ..._SCALE bit to the next structure field. That field was .info_mask_shared_by_type before the _available attributes was added by commit 51239600074b ("iio:core: add a callback to allow drivers to provide _available attributes") and .info_mask_separate_available afterwards, and the regression happened. info_mask_shared_by_type is actually a better choice than the originally intended info_mask_separate for the ..._SCALE bit since a constant is returned from mpl3115_read_raw for the scale. Using info_mask_shared_by_type also preserves the behavior from before the regression and is therefore less likely to cause other interesting side effects. The above mentioned regression causes an unintended sysfs attibute to show up that is not backed by code, in turn causing the following NULL pointer defererence to happen on access. Segmentation fault Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = ecc3c000 [00000000] *pgd=87f91831 Internal error: Oops: 80000007 [#1] SMP ARM Modules linked in: CPU: 1 PID: 1051 Comm: cat Not tainted 4.10.0-rc5-00009-gffd8858-dirty #3 Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) task: ed54ec00 task.stack: ee2bc000 PC is at 0x0 LR is at iio_read_channel_info_avail+0x40/0x280 pc : [<00000000>] lr : [] psr: a0070013 sp : ee2bdda8 ip : 00000000 fp : ee2bddf4 r10: c0a53c74 r9 : ed79f000 r8 : ee8d1018 r7 : 00001000 r6 : 00000fff r5 : ee8b9a00 r4 : ed79f000 r3 : ee2bddc4 r2 : ee2bddbc r1 : c0a86dcc r0 : ee8d1000 Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 3cc3c04a DAC: 00000051 Process cat (pid: 1051, stack limit = 0xee2bc210) Stack: (0xee2bdda8 to 0xee2be000) dda0: ee2bddc0 00000002 c016d720 c016d394 ed54ec00 00000000 ddc0: 60070013 ed413780 00000001 edffd480 ee8b9a00 00000fff 00001000 ee8d1018 dde0: ed79f000 c0a53c74 ee2bde0c ee2bddf8 c0513c58 c06fbbe8 edffd480 edffd540 de00: ee2bde3c ee2bde10 c0293474 c0513c40 c02933e4 ee2bde60 00000001 ed413780 de20: 00000001 ed413780 00000000 edffd480 ee2bde4c ee2bde40 c0291d00 c02933f0 de40: ee2bde9c ee2bde50 c024679c c0291ce0 edffd4b0 b6e37000 00020000 ee2bdf78 de60: 00000000 00000000 ed54ec00 ed013200 00000817 c0a111fc edffd540 ed413780 de80: b6e37000 00020000 00020000 ee2bdf78 ee2bded4 ee2bdea0 c0292890 c0246604 dea0: c0117940 c016ba50 00000025 c0a111fc b6e37000 ed413780 ee2bdf78 00020000 dec0: ee2bc000 b6e37000 ee2bdf44 ee2bded8 c021d158 c0292770 c0117764 b6e36004 dee0: c0f0d7c4 ee2bdfb0 b6f89228 00021008 ee2bdfac ee2bdf00 c0101374 c0117770 df00: 00000000 00000000 ee2bc000 00000000 ee2bdf34 ee2bdf20 c016ba04 c0171080 df20: 00000000 00020000 ed413780 b6e37000 00000000 ee2bdf78 ee2bdf74 ee2bdf48 df40: c021e7a0 c021d130 c023e300 c023e280 ee2bdf74 00000000 00000000 ed413780 df60: ed413780 00020000 ee2bdfa4 ee2bdf78 c021e870 c021e71c 00000000 00000000 df80: 00020000 00020000 b6e37000 00000003 c0108084 00000000 00000000 ee2bdfa8 dfa0: c0107ee0 c021e838 00020000 00020000 00000003 b6e37000 00020000 0001a2b4 dfc0: 00020000 00020000 b6e37000 00000003 7fffe000 00000000 00000000 00020000 dfe0: 00000000 be98eb4c 0000c740 b6f1985c 60070010 00000003 00000000 00000000 Backtrace: [] (iio_read_channel_info_avail) from [] (dev_attr_show+0x24/0x50) r10:c0a53c74 r9:ed79f000 r8:ee8d1018 r7:00001000 r6:00000fff r5:ee8b9a00 r4:edffd480 [] (dev_attr_show) from [] (sysfs_kf_seq_show+0x90/0x110) r5:edffd540 r4:edffd480 [] (sysfs_kf_seq_show) from [] (kernfs_seq_show+0x2c/0x30) r10:edffd480 r9:00000000 r8:ed413780 r7:00000001 r6:ed413780 r5:00000001 r4:ee2bde60 r3:c02933e4 [] (kernfs_seq_show) from [] (seq_read+0x1a4/0x4e0) [] (seq_read) from [] (kernfs_fop_read+0x12c/0x1cc) r10:ee2bdf78 r9:00020000 r8:00020000 r7:b6e37000 r6:ed413780 r5:edffd540 r4:c0a111fc [] (kernfs_fop_read) from [] (__vfs_read+0x34/0x118) r10:b6e37000 r9:ee2bc000 r8:00020000 r7:ee2bdf78 r6:ed413780 r5:b6e37000 r4:c0a111fc [] (__vfs_read) from [] (vfs_read+0x90/0x11c) r8:ee2bdf78 r7:00000000 r6:b6e37000 r5:ed413780 r4:00020000 [] (vfs_read) from [] (SyS_read+0x44/0x90) r8:00020000 r7:ed413780 r6:ed413780 r5:00000000 r4:00000000 [] (SyS_read) from [] (ret_fast_syscall+0x0/0x1c) r10:00000000 r8:c0108084 r7:00000003 r6:b6e37000 r5:00020000 r4:00020000 Code: bad PC value ---[ end trace 9c4938ccd0389004 ]--- Fixes: cc26ad455f57 ("iio: Add Freescale MPL3115A2 pressure / temperature sensor driver") Fixes: 51239600074b ("iio:core: add a callback to allow drivers to provide _available attributes") Reported-by: Ken Lin Tested-by: Ken Lin Signed-off-by: Peter Rosin Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/pressure/mpl3115.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/pressure/mpl3115.c b/drivers/iio/pressure/mpl3115.c index 6392d7b62841..eb87948fc559 100644 --- a/drivers/iio/pressure/mpl3115.c +++ b/drivers/iio/pressure/mpl3115.c @@ -182,7 +182,7 @@ static const struct iio_chan_spec mpl3115_channels[] = { { .type = IIO_PRESSURE, .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - BIT(IIO_CHAN_INFO_SCALE), + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), .scan_index = 0, .scan_type = { .sign = 'u', @@ -195,7 +195,7 @@ static const struct iio_chan_spec mpl3115_channels[] = { { .type = IIO_TEMP, .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - BIT(IIO_CHAN_INFO_SCALE), + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), .scan_index = 1, .scan_type = { .sign = 's', From cec7abd27e878e3c83dc9af41ee87a2e9d483ac0 Mon Sep 17 00:00:00 2001 From: Ethan Zonca Date: Fri, 24 Feb 2017 11:27:36 -0500 Subject: [PATCH 096/154] can: gs_usb: Don't use stack memory for USB transfers commit c919a3069c775c1c876bec55e00b2305d5125caa upstream. Fixes: 05ca5270005c can: gs_usb: add ethtool set_phys_id callback to locate physical device The gs_usb driver is performing USB transfers using buffers allocated on the stack. This causes the driver to not function with vmapped stacks. Instead, allocate memory for the transfer buffers. Signed-off-by: Ethan Zonca Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/gs_usb.c | 40 ++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 77e3cc06a30c..a0dabd4038ba 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -908,10 +908,14 @@ static int gs_usb_probe(struct usb_interface *intf, struct gs_usb *dev; int rc = -ENOMEM; unsigned int icount, i; - struct gs_host_config hconf = { - .byte_order = 0x0000beef, - }; - struct gs_device_config dconf; + struct gs_host_config *hconf; + struct gs_device_config *dconf; + + hconf = kmalloc(sizeof(*hconf), GFP_KERNEL); + if (!hconf) + return -ENOMEM; + + hconf->byte_order = 0x0000beef; /* send host config */ rc = usb_control_msg(interface_to_usbdev(intf), @@ -920,16 +924,22 @@ static int gs_usb_probe(struct usb_interface *intf, USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE, 1, intf->altsetting[0].desc.bInterfaceNumber, - &hconf, - sizeof(hconf), + hconf, + sizeof(*hconf), 1000); + kfree(hconf); + if (rc < 0) { dev_err(&intf->dev, "Couldn't send data format (err=%d)\n", rc); return rc; } + dconf = kmalloc(sizeof(*dconf), GFP_KERNEL); + if (!dconf) + return -ENOMEM; + /* read device config */ rc = usb_control_msg(interface_to_usbdev(intf), usb_rcvctrlpipe(interface_to_usbdev(intf), 0), @@ -937,28 +947,33 @@ static int gs_usb_probe(struct usb_interface *intf, USB_DIR_IN|USB_TYPE_VENDOR|USB_RECIP_INTERFACE, 1, intf->altsetting[0].desc.bInterfaceNumber, - &dconf, - sizeof(dconf), + dconf, + sizeof(*dconf), 1000); if (rc < 0) { dev_err(&intf->dev, "Couldn't get device config: (err=%d)\n", rc); + kfree(dconf); return rc; } - icount = dconf.icount + 1; + icount = dconf->icount + 1; dev_info(&intf->dev, "Configuring for %d interfaces\n", icount); if (icount > GS_MAX_INTF) { dev_err(&intf->dev, "Driver cannot handle more that %d CAN interfaces\n", GS_MAX_INTF); + kfree(dconf); return -EINVAL; } dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) + if (!dev) { + kfree(dconf); return -ENOMEM; + } + init_usb_anchor(&dev->rx_submitted); atomic_set(&dev->active_channels, 0); @@ -967,7 +982,7 @@ static int gs_usb_probe(struct usb_interface *intf, dev->udev = interface_to_usbdev(intf); for (i = 0; i < icount; i++) { - dev->canch[i] = gs_make_candev(i, intf, &dconf); + dev->canch[i] = gs_make_candev(i, intf, dconf); if (IS_ERR_OR_NULL(dev->canch[i])) { /* save error code to return later */ rc = PTR_ERR(dev->canch[i]); @@ -978,12 +993,15 @@ static int gs_usb_probe(struct usb_interface *intf, gs_destroy_candev(dev->canch[i]); usb_kill_anchored_urbs(&dev->rx_submitted); + kfree(dconf); kfree(dev); return rc; } dev->canch[i]->parent = dev; } + kfree(dconf); + return 0; } From 7afc0ee6ae73c23d8acc7a58113e269f6c1da2a2 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 2 Mar 2017 12:03:40 +0100 Subject: [PATCH 097/154] can: usb_8dev: Fix memory leak of priv->cmd_msg_buffer commit 7c42631376306fb3f34d51fda546b50a9b6dd6ec upstream. The priv->cmd_msg_buffer is allocated in the probe function, but never kfree()ed. This patch converts the kzalloc() to resource-managed kzalloc. Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/usb_8dev.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index 108a30e15097..d000cb62d6ae 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -951,8 +951,8 @@ static int usb_8dev_probe(struct usb_interface *intf, for (i = 0; i < MAX_TX_URBS; i++) priv->tx_contexts[i].echo_index = MAX_TX_URBS; - priv->cmd_msg_buffer = kzalloc(sizeof(struct usb_8dev_cmd_msg), - GFP_KERNEL); + priv->cmd_msg_buffer = devm_kzalloc(&intf->dev, sizeof(struct usb_8dev_cmd_msg), + GFP_KERNEL); if (!priv->cmd_msg_buffer) goto cleanup_candev; @@ -966,7 +966,7 @@ static int usb_8dev_probe(struct usb_interface *intf, if (err) { netdev_err(netdev, "couldn't register CAN device: %d\n", err); - goto cleanup_cmd_msg_buffer; + goto cleanup_candev; } err = usb_8dev_cmd_version(priv, &version); @@ -987,9 +987,6 @@ static int usb_8dev_probe(struct usb_interface *intf, cleanup_unregister_candev: unregister_netdev(priv->netdev); -cleanup_cmd_msg_buffer: - kfree(priv->cmd_msg_buffer); - cleanup_candev: free_candev(netdev); From 1de86951101be4a3bcee9355a10f327bba491ec5 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Sat, 21 Jan 2017 23:50:18 +0100 Subject: [PATCH 098/154] w1: don't leak refcount on slave attach failure in w1_attach_slave_device() commit d2ce4ea1a0b0162e5d2d7e7942ab6f5cc2063d5a upstream. Near the beginning of w1_attach_slave_device() we increment a w1 master reference count. Later, when we are going to exit this function without actually attaching a slave device (due to failure of __w1_attach_slave_device()) we need to decrement this reference count back. Signed-off-by: Maciej S. Szmigiero Fixes: 9fcbbac5ded489 ("w1: process w1 netlink commands in w1_process thread") Cc: Evgeniy Polyakov Signed-off-by: Greg Kroah-Hartman --- drivers/w1/w1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index e213c678bbfe..ab0931e7a9bb 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -763,6 +763,7 @@ int w1_attach_slave_device(struct w1_master *dev, struct w1_reg_num *rn) dev_err(&dev->dev, "%s: Attaching %s failed.\n", __func__, sl->name); w1_family_put(sl->family); + atomic_dec(&sl->master->refcnt); kfree(sl); return err; } From c259832acf57c51971582c84c89b2941a8a9e1ef Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Wed, 18 Jan 2017 21:31:11 +0100 Subject: [PATCH 099/154] w1: ds2490: USB transfer buffers need to be DMAable commit 61cd1b4cd1e8f7f7642ab64529d9bd52e8374641 upstream. ds2490 driver was doing USB transfers from / to buffers on a stack. This is not permitted and made the driver non-working with vmapped stacks. Since all these transfers are done under the same bus_mutex lock we can simply use shared buffers in a device private structure for two most common of them. While we are at it, let's also fix a comparison between int and size_t in ds9490r_search() which made the driver spin in this function if state register get requests were failing. Signed-off-by: Maciej S. Szmigiero Acked-by: Evgeniy Polyakov Signed-off-by: Greg Kroah-Hartman --- drivers/w1/masters/ds2490.c | 142 +++++++++++++++++++++--------------- 1 file changed, 84 insertions(+), 58 deletions(-) diff --git a/drivers/w1/masters/ds2490.c b/drivers/w1/masters/ds2490.c index 049a884a756f..59d74d1b47a8 100644 --- a/drivers/w1/masters/ds2490.c +++ b/drivers/w1/masters/ds2490.c @@ -153,6 +153,9 @@ struct ds_device */ u16 spu_bit; + u8 st_buf[ST_SIZE]; + u8 byte_buf; + struct w1_bus_master master; }; @@ -174,7 +177,6 @@ struct ds_status u8 data_in_buffer_status; u8 reserved1; u8 reserved2; - }; static struct usb_device_id ds_id_table [] = { @@ -244,28 +246,6 @@ static int ds_send_control(struct ds_device *dev, u16 value, u16 index) return err; } -static int ds_recv_status_nodump(struct ds_device *dev, struct ds_status *st, - unsigned char *buf, int size) -{ - int count, err; - - memset(st, 0, sizeof(*st)); - - count = 0; - err = usb_interrupt_msg(dev->udev, usb_rcvintpipe(dev->udev, - dev->ep[EP_STATUS]), buf, size, &count, 1000); - if (err < 0) { - pr_err("Failed to read 1-wire data from 0x%x: err=%d.\n", - dev->ep[EP_STATUS], err); - return err; - } - - if (count >= sizeof(*st)) - memcpy(st, buf, sizeof(*st)); - - return count; -} - static inline void ds_print_msg(unsigned char *buf, unsigned char *str, int off) { pr_info("%45s: %8x\n", str, buf[off]); @@ -324,6 +304,35 @@ static void ds_dump_status(struct ds_device *dev, unsigned char *buf, int count) } } +static int ds_recv_status(struct ds_device *dev, struct ds_status *st, + bool dump) +{ + int count, err; + + if (st) + memset(st, 0, sizeof(*st)); + + count = 0; + err = usb_interrupt_msg(dev->udev, + usb_rcvintpipe(dev->udev, + dev->ep[EP_STATUS]), + dev->st_buf, sizeof(dev->st_buf), + &count, 1000); + if (err < 0) { + pr_err("Failed to read 1-wire data from 0x%x: err=%d.\n", + dev->ep[EP_STATUS], err); + return err; + } + + if (dump) + ds_dump_status(dev, dev->st_buf, count); + + if (st && count >= sizeof(*st)) + memcpy(st, dev->st_buf, sizeof(*st)); + + return count; +} + static void ds_reset_device(struct ds_device *dev) { ds_send_control_cmd(dev, CTL_RESET_DEVICE, 0); @@ -344,7 +353,6 @@ static void ds_reset_device(struct ds_device *dev) static int ds_recv_data(struct ds_device *dev, unsigned char *buf, int size) { int count, err; - struct ds_status st; /* Careful on size. If size is less than what is available in * the input buffer, the device fails the bulk transfer and @@ -359,14 +367,9 @@ static int ds_recv_data(struct ds_device *dev, unsigned char *buf, int size) err = usb_bulk_msg(dev->udev, usb_rcvbulkpipe(dev->udev, dev->ep[EP_DATA_IN]), buf, size, &count, 1000); if (err < 0) { - u8 buf[ST_SIZE]; - int count; - pr_info("Clearing ep0x%x.\n", dev->ep[EP_DATA_IN]); usb_clear_halt(dev->udev, usb_rcvbulkpipe(dev->udev, dev->ep[EP_DATA_IN])); - - count = ds_recv_status_nodump(dev, &st, buf, sizeof(buf)); - ds_dump_status(dev, buf, count); + ds_recv_status(dev, NULL, true); return err; } @@ -404,7 +407,6 @@ int ds_stop_pulse(struct ds_device *dev, int limit) { struct ds_status st; int count = 0, err = 0; - u8 buf[ST_SIZE]; do { err = ds_send_control(dev, CTL_HALT_EXE_IDLE, 0); @@ -413,7 +415,7 @@ int ds_stop_pulse(struct ds_device *dev, int limit) err = ds_send_control(dev, CTL_RESUME_EXE, 0); if (err) break; - err = ds_recv_status_nodump(dev, &st, buf, sizeof(buf)); + err = ds_recv_status(dev, &st, false); if (err) break; @@ -456,18 +458,17 @@ int ds_detect(struct ds_device *dev, struct ds_status *st) static int ds_wait_status(struct ds_device *dev, struct ds_status *st) { - u8 buf[ST_SIZE]; int err, count = 0; do { st->status = 0; - err = ds_recv_status_nodump(dev, st, buf, sizeof(buf)); + err = ds_recv_status(dev, st, false); #if 0 if (err >= 0) { int i; printk("0x%x: count=%d, status: ", dev->ep[EP_STATUS], err); for (i=0; ist_buf[i]); printk("\n"); } #endif @@ -485,7 +486,7 @@ static int ds_wait_status(struct ds_device *dev, struct ds_status *st) * can do something with it). */ if (err > 16 || count >= 100 || err < 0) - ds_dump_status(dev, buf, err); + ds_dump_status(dev, dev->st_buf, err); /* Extended data isn't an error. Well, a short is, but the dump * would have already told the user that and we can't do anything @@ -608,7 +609,6 @@ static int ds_write_byte(struct ds_device *dev, u8 byte) { int err; struct ds_status st; - u8 rbyte; err = ds_send_control(dev, COMM_BYTE_IO | COMM_IM | dev->spu_bit, byte); if (err) @@ -621,11 +621,11 @@ static int ds_write_byte(struct ds_device *dev, u8 byte) if (err) return err; - err = ds_recv_data(dev, &rbyte, sizeof(rbyte)); + err = ds_recv_data(dev, &dev->byte_buf, 1); if (err < 0) return err; - return !(byte == rbyte); + return !(byte == dev->byte_buf); } static int ds_read_byte(struct ds_device *dev, u8 *byte) @@ -712,7 +712,6 @@ static void ds9490r_search(void *data, struct w1_master *master, int err; u16 value, index; struct ds_status st; - u8 st_buf[ST_SIZE]; int search_limit; int found = 0; int i; @@ -724,7 +723,12 @@ static void ds9490r_search(void *data, struct w1_master *master, /* FIFO 128 bytes, bulk packet size 64, read a multiple of the * packet size. */ - u64 buf[2*64/8]; + const size_t bufsize = 2 * 64; + u64 *buf; + + buf = kmalloc(bufsize, GFP_KERNEL); + if (!buf) + return; mutex_lock(&master->bus_mutex); @@ -745,10 +749,9 @@ static void ds9490r_search(void *data, struct w1_master *master, do { schedule_timeout(jtime); - if (ds_recv_status_nodump(dev, &st, st_buf, sizeof(st_buf)) < - sizeof(st)) { + err = ds_recv_status(dev, &st, false); + if (err < 0 || err < sizeof(st)) break; - } if (st.data_in_buffer_status) { /* Bulk in can receive partial ids, but when it does @@ -758,7 +761,7 @@ static void ds9490r_search(void *data, struct w1_master *master, * bulk without first checking if status says there * is data to read. */ - err = ds_recv_data(dev, (u8 *)buf, sizeof(buf)); + err = ds_recv_data(dev, (u8 *)buf, bufsize); if (err < 0) break; for (i = 0; i < err/8; ++i) { @@ -794,9 +797,14 @@ static void ds9490r_search(void *data, struct w1_master *master, } search_out: mutex_unlock(&master->bus_mutex); + kfree(buf); } #if 0 +/* + * FIXME: if this disabled code is ever used in the future all ds_send_data() + * calls must be changed to use a DMAable buffer. + */ static int ds_match_access(struct ds_device *dev, u64 init) { int err; @@ -845,13 +853,12 @@ static int ds_set_path(struct ds_device *dev, u64 init) static u8 ds9490r_touch_bit(void *data, u8 bit) { - u8 ret; struct ds_device *dev = data; - if (ds_touch_bit(dev, bit, &ret)) + if (ds_touch_bit(dev, bit, &dev->byte_buf)) return 0; - return ret; + return dev->byte_buf; } #if 0 @@ -866,13 +873,12 @@ static u8 ds9490r_read_bit(void *data) { struct ds_device *dev = data; int err; - u8 bit = 0; - err = ds_touch_bit(dev, 1, &bit); + err = ds_touch_bit(dev, 1, &dev->byte_buf); if (err) return 0; - return bit & 1; + return dev->byte_buf & 1; } #endif @@ -887,32 +893,52 @@ static u8 ds9490r_read_byte(void *data) { struct ds_device *dev = data; int err; - u8 byte = 0; - err = ds_read_byte(dev, &byte); + err = ds_read_byte(dev, &dev->byte_buf); if (err) return 0; - return byte; + return dev->byte_buf; } static void ds9490r_write_block(void *data, const u8 *buf, int len) { struct ds_device *dev = data; + u8 *tbuf; - ds_write_block(dev, (u8 *)buf, len); + if (len <= 0) + return; + + tbuf = kmalloc(len, GFP_KERNEL); + if (!tbuf) + return; + + memcpy(tbuf, buf, len); + ds_write_block(dev, tbuf, len); + + kfree(tbuf); } static u8 ds9490r_read_block(void *data, u8 *buf, int len) { struct ds_device *dev = data; int err; + u8 *tbuf; - err = ds_read_block(dev, buf, len); - if (err < 0) + if (len <= 0) return 0; - return len; + tbuf = kmalloc(len, GFP_KERNEL); + if (!tbuf) + return 0; + + err = ds_read_block(dev, tbuf, len); + if (err >= 0) + memcpy(buf, tbuf, len); + + kfree(tbuf); + + return err >= 0 ? len : 0; } static u8 ds9490r_reset(void *data) From 5413f432a1f33293b9183fadb1b44e29b168aa24 Mon Sep 17 00:00:00 2001 From: Alexandre Bailon Date: Wed, 1 Feb 2017 21:30:12 -0600 Subject: [PATCH 100/154] usb: musb: da8xx: Remove CPPI 3.0 quirk and methods commit a994ce2d7e66008381a0b184c73be9ae9b72eb5c upstream. DA8xx driver is registering and using the CPPI 3.0 DMA controller but actually, the DA8xx has a CPPI 4.1 DMA controller. Remove the CPPI 3.0 quirk and methods. Fixes: f8e9f34f80a2 ("usb: musb: Fix up DMA related macros") Fixes: 7f6283ed6fe8 ("usb: musb: Set up function pointers for DMA") Signed-off-by: Alexandre Bailon Acked-by: Sergei Shtylyov Acked-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/da8xx.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c index 2440f88e07a3..bacee0fd4dd3 100644 --- a/drivers/usb/musb/da8xx.c +++ b/drivers/usb/musb/da8xx.c @@ -434,15 +434,11 @@ static int da8xx_musb_exit(struct musb *musb) } static const struct musb_platform_ops da8xx_ops = { - .quirks = MUSB_DMA_CPPI | MUSB_INDEXED_EP, + .quirks = MUSB_INDEXED_EP, .init = da8xx_musb_init, .exit = da8xx_musb_exit, .fifo_mode = 2, -#ifdef CONFIG_USB_TI_CPPI_DMA - .dma_init = cppi_dma_controller_create, - .dma_exit = cppi_dma_controller_destroy, -#endif .enable = da8xx_musb_enable, .disable = da8xx_musb_disable, From 3ccf60e1ff5e6cbd47b860b95f6cb53ebd98113b Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 19 Jan 2017 13:38:42 +0200 Subject: [PATCH 101/154] usb: dwc3: gadget: skip Set/Clear Halt when invalid commit ffb80fc672c3a7b6afd0cefcb1524fb99917b2f3 upstream. At least macOS seems to be sending ClearFeature(ENDPOINT_HALT) to endpoints which aren't Halted. This makes DWC3's CLEARSTALL command time out which causes several issues for the driver. Instead, let's just return 0 and bail out early. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index d2b860ebfe13..5dc6bfc91f4b 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1234,6 +1234,9 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) unsigned transfer_in_flight; unsigned started; + if (dep->flags & DWC3_EP_STALL) + return 0; + if (dep->number > 1) trb = dwc3_ep_prev_trb(dep, dep->trb_enqueue); else @@ -1258,6 +1261,8 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) else dep->flags |= DWC3_EP_STALL; } else { + if (!(dep->flags & DWC3_EP_STALL)) + return 0; ret = dwc3_send_clear_stall_ep_cmd(dep); if (ret) From 82bd560d9552a7b30ad1123591de63684fe0a63e Mon Sep 17 00:00:00 2001 From: William wu Date: Tue, 17 Jan 2017 15:32:07 +0800 Subject: [PATCH 102/154] usb: host: xhci: plat: check hcc_params after add hcd commit 5de4e1ea9a731cad195ce5152705c21daef3bbba upstream. The commit 4ac53087d6d4 ("usb: xhci: plat: Create both HCDs before adding them") move add hcd to the end of probe, this cause hcc_params uninitiated, because xHCI driver sets hcc_params in xhci_gen_setup() called from usb_add_hcd(). This patch checks the Maximum Primary Stream Array Size in the hcc_params register after add primary hcd. Signed-off-by: William wu Acked-by: Roger Quadros Fixes: 4ac53087d6d4 ("usb: xhci: plat: Create both HCDs before adding them") Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index ed56bf9ed885..abe360684f0b 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -223,9 +223,6 @@ static int xhci_plat_probe(struct platform_device *pdev) if (device_property_read_bool(&pdev->dev, "usb3-lpm-capable")) xhci->quirks |= XHCI_LPM_SUPPORT; - if (HCC_MAX_PSA(xhci->hcc_params) >= 4) - xhci->shared_hcd->can_do_streams = 1; - hcd->usb_phy = devm_usb_get_phy_by_phandle(&pdev->dev, "usb-phy", 0); if (IS_ERR(hcd->usb_phy)) { ret = PTR_ERR(hcd->usb_phy); @@ -242,6 +239,9 @@ static int xhci_plat_probe(struct platform_device *pdev) if (ret) goto disable_usb_phy; + if (HCC_MAX_PSA(xhci->hcc_params) >= 4) + xhci->shared_hcd->can_do_streams = 1; + ret = usb_add_hcd(xhci->shared_hcd, irq, IRQF_SHARED); if (ret) goto dealloc_usb2_hcd; From 881b5225558e2bad2f02aca4058b269dce0a834d Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Mon, 16 Jan 2017 08:40:57 +0100 Subject: [PATCH 103/154] usb: gadget: udc-core: Rescan pending list on driver unbind commit 8236800da115a3e24b9165c573067343f51cf5ea upstream. Since: commit 855ed04a3758 ("usb: gadget: udc-core: independent registration of gadgets and gadget drivers") if we load gadget module but there is no free udc available then it will be stored on a pending gadgets list. $ modprobe g_zero.ko $ modprobe g_ether.ko [] udc-core: couldn't find an available UDC - added [g_ether] to list of pending drivers We scan this list each time when new UDC appears in system. But we can get a free UDC each time after gadget unbind. This commit add scanning of that list directly after unbinding gadget from udc. Thanks to this, when we unload first gadget: $ rmmod g_zero.ko gadget which is pending is automatically attached to that UDC (if name matches). Fixes: 855ed04a3758 ("usb: gadget: udc-core: independent registration of gadgets and gadget drivers") Signed-off-by: Krzysztof Opasiak Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 45 ++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 0402177f93cd..d685d82dcf48 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1080,6 +1080,24 @@ static void usb_udc_nop_release(struct device *dev) dev_vdbg(dev, "%s\n", __func__); } +/* should be called with udc_lock held */ +static int check_pending_gadget_drivers(struct usb_udc *udc) +{ + struct usb_gadget_driver *driver; + int ret = 0; + + list_for_each_entry(driver, &gadget_driver_pending_list, pending) + if (!driver->udc_name || strcmp(driver->udc_name, + dev_name(&udc->dev)) == 0) { + ret = udc_bind_to_driver(udc, driver); + if (ret != -EPROBE_DEFER) + list_del(&driver->pending); + break; + } + + return ret; +} + /** * usb_add_gadget_udc_release - adds a new gadget to the udc class driver list * @parent: the parent device to this udc. Usually the controller driver's @@ -1093,7 +1111,6 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget, void (*release)(struct device *dev)) { struct usb_udc *udc; - struct usb_gadget_driver *driver; int ret = -ENOMEM; udc = kzalloc(sizeof(*udc), GFP_KERNEL); @@ -1136,17 +1153,9 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget, udc->vbus = true; /* pick up one of pending gadget drivers */ - list_for_each_entry(driver, &gadget_driver_pending_list, pending) { - if (!driver->udc_name || strcmp(driver->udc_name, - dev_name(&udc->dev)) == 0) { - ret = udc_bind_to_driver(udc, driver); - if (ret != -EPROBE_DEFER) - list_del(&driver->pending); - if (ret) - goto err5; - break; - } - } + ret = check_pending_gadget_drivers(udc); + if (ret) + goto err5; mutex_unlock(&udc_lock); @@ -1356,14 +1365,22 @@ int usb_gadget_unregister_driver(struct usb_gadget_driver *driver) return -EINVAL; mutex_lock(&udc_lock); - list_for_each_entry(udc, &udc_list, list) + list_for_each_entry(udc, &udc_list, list) { if (udc->driver == driver) { usb_gadget_remove_driver(udc); usb_gadget_set_state(udc->gadget, - USB_STATE_NOTATTACHED); + USB_STATE_NOTATTACHED); + + /* Maybe there is someone waiting for this UDC? */ + check_pending_gadget_drivers(udc); + /* + * For now we ignore bind errors as probably it's + * not a valid reason to fail other's gadget unbind + */ ret = 0; break; } + } if (ret) { list_del(&driver->pending); From 92ee9483bc5fe7d4c2e6730515849467d818f8f7 Mon Sep 17 00:00:00 2001 From: Magnus Lilja Date: Wed, 25 Jan 2017 22:07:59 +0100 Subject: [PATCH 104/154] usb: gadget: udc: fsl: Add missing complete function. commit 5528954a1a0c49c6974ef1b8d6eaceff536204d5 upstream. Commit 304f7e5e1d08 ("usb: gadget: Refactor request completion") removed check if req->req.complete is non-NULL, resulting in a NULL pointer derefence and a kernel panic. This patch adds an empty complete function instead of re-introducing the req->req.complete check. Fixes: 304f7e5e1d08 ("usb: gadget: Refactor request completion") Signed-off-by: Magnus Lilja Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/fsl_udc_core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c index aab5221d6c2e..aac0ce8aeb0b 100644 --- a/drivers/usb/gadget/udc/fsl_udc_core.c +++ b/drivers/usb/gadget/udc/fsl_udc_core.c @@ -1249,6 +1249,12 @@ static const struct usb_gadget_ops fsl_gadget_ops = { .udc_stop = fsl_udc_stop, }; +/* + * Empty complete function used by this driver to fill in the req->complete + * field when creating a request since the complete field is mandatory. + */ +static void fsl_noop_complete(struct usb_ep *ep, struct usb_request *req) { } + /* Set protocol stall on ep0, protocol stall will automatically be cleared on new transaction */ static void ep0stall(struct fsl_udc *udc) @@ -1283,7 +1289,7 @@ static int ep0_prime_status(struct fsl_udc *udc, int direction) req->req.length = 0; req->req.status = -EINPROGRESS; req->req.actual = 0; - req->req.complete = NULL; + req->req.complete = fsl_noop_complete; req->dtd_count = 0; ret = usb_gadget_map_request(&ep->udc->gadget, &req->req, ep_is_in(ep)); @@ -1366,7 +1372,7 @@ static void ch9getstatus(struct fsl_udc *udc, u8 request_type, u16 value, req->req.length = 2; req->req.status = -EINPROGRESS; req->req.actual = 0; - req->req.complete = NULL; + req->req.complete = fsl_noop_complete; req->dtd_count = 0; ret = usb_gadget_map_request(&ep->udc->gadget, &req->req, ep_is_in(ep)); From b6092a57150c1641dffb6bfeebbe2cbde7275d1f Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Thu, 19 Jan 2017 18:55:27 +0100 Subject: [PATCH 105/154] usb: gadget: f_hid: fix: Free out requests commit 20d2ca955bd09639c7b01db5761d157c297aea0a upstream. Requests for out endpoint are allocated in bind() function but never released. This commit ensures that all pending requests are released when we disable out endpoint. Fixes: 99c515005857 ("usb: gadget: hidg: register OUT INT endpoint for SET_REPORT") Tested-by: David Lechner Signed-off-by: Krzysztof Opasiak Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 36 +++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index e2966f87c860..ceb1d0cc5c90 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -371,20 +371,36 @@ static inline struct usb_request *hidg_alloc_ep_req(struct usb_ep *ep, static void hidg_set_report_complete(struct usb_ep *ep, struct usb_request *req) { struct f_hidg *hidg = (struct f_hidg *) req->context; + struct usb_composite_dev *cdev = hidg->func.config->cdev; struct f_hidg_req_list *req_list; unsigned long flags; - req_list = kzalloc(sizeof(*req_list), GFP_ATOMIC); - if (!req_list) + switch (req->status) { + case 0: + req_list = kzalloc(sizeof(*req_list), GFP_ATOMIC); + if (!req_list) { + ERROR(cdev, "Unable to allocate mem for req_list\n"); + goto free_req; + } + + req_list->req = req; + + spin_lock_irqsave(&hidg->spinlock, flags); + list_add_tail(&req_list->list, &hidg->completed_out_req); + spin_unlock_irqrestore(&hidg->spinlock, flags); + + wake_up(&hidg->read_queue); + break; + default: + ERROR(cdev, "Set report failed %d\n", req->status); + /* FALLTHROUGH */ + case -ECONNABORTED: /* hardware forced ep reset */ + case -ECONNRESET: /* request dequeued */ + case -ESHUTDOWN: /* disconnect from host */ +free_req: + free_ep_req(ep, req); return; - - req_list->req = req; - - spin_lock_irqsave(&hidg->spinlock, flags); - list_add_tail(&req_list->list, &hidg->completed_out_req); - spin_unlock_irqrestore(&hidg->spinlock, flags); - - wake_up(&hidg->read_queue); + } } static int hidg_setup(struct usb_function *f, From d3acd94c0f79387b0d1855bdd4690f1596eb3f25 Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Thu, 19 Jan 2017 18:55:28 +0100 Subject: [PATCH 106/154] usb: gadget: f_hid: fix: Prevent accessing released memory commit aa65d11aa008f4de58a9cee7e121666d9d68505e upstream. When we unlock our spinlock to copy data to user we may get disabled by USB host and free the whole list of completed out requests including the one from which we are copying the data to user memory. To prevent from this let's remove our working element from the list and place it back only if there is sth left when we finish with it. Fixes: 99c515005857 ("usb: gadget: hidg: register OUT INT endpoint for SET_REPORT") Tested-by: David Lechner Signed-off-by: Krzysztof Opasiak Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index ceb1d0cc5c90..b914e5725387 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -223,6 +223,13 @@ static ssize_t f_hidg_read(struct file *file, char __user *buffer, /* pick the first one */ list = list_first_entry(&hidg->completed_out_req, struct f_hidg_req_list, list); + + /* + * Remove this from list to protect it from beign free() + * while host disables our function + */ + list_del(&list->list); + req = list->req; count = min_t(unsigned int, count, req->actual - list->pos); spin_unlock_irqrestore(&hidg->spinlock, flags); @@ -238,15 +245,20 @@ static ssize_t f_hidg_read(struct file *file, char __user *buffer, * call, taking into account its current read position. */ if (list->pos == req->actual) { - spin_lock_irqsave(&hidg->spinlock, flags); - list_del(&list->list); kfree(list); - spin_unlock_irqrestore(&hidg->spinlock, flags); req->length = hidg->report_length; ret = usb_ep_queue(hidg->out_ep, req, GFP_KERNEL); - if (ret < 0) + if (ret < 0) { + free_ep_req(hidg->out_ep, req); return ret; + } + } else { + spin_lock_irqsave(&hidg->spinlock, flags); + list_add(&list->list, &hidg->completed_out_req); + spin_unlock_irqrestore(&hidg->spinlock, flags); + + wake_up(&hidg->read_queue); } return count; @@ -506,14 +518,18 @@ static void hidg_disable(struct usb_function *f) { struct f_hidg *hidg = func_to_hidg(f); struct f_hidg_req_list *list, *next; + unsigned long flags; usb_ep_disable(hidg->in_ep); usb_ep_disable(hidg->out_ep); + spin_lock_irqsave(&hidg->spinlock, flags); list_for_each_entry_safe(list, next, &hidg->completed_out_req, list) { + free_ep_req(hidg->out_ep, list->req); list_del(&list->list); kfree(list); } + spin_unlock_irqrestore(&hidg->spinlock, flags); } static int hidg_set_alt(struct usb_function *f, unsigned intf, unsigned alt) From 6d0511ed15db30965f8a3be8f0733bb6efd2b95e Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Thu, 19 Jan 2017 18:55:29 +0100 Subject: [PATCH 107/154] usb: gadget: f_hid: Use spinlock instead of mutex commit 33e4c1a9987a1fc3b42c3b534100b5b006d55c61 upstream. As IN request has to be allocated in set_alt() and released in disable() we cannot use mutex to protect it as we cannot sleep in those funcitons. Let's replace this mutex with a spinlock. Tested-by: David Lechner Signed-off-by: Krzysztof Opasiak Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 57 +++++++++++++++++------------ 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index b914e5725387..b0f71957d00b 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -50,12 +50,12 @@ struct f_hidg { /* recv report */ struct list_head completed_out_req; - spinlock_t spinlock; + spinlock_t read_spinlock; wait_queue_head_t read_queue; unsigned int qlen; /* send report */ - struct mutex lock; + spinlock_t write_spinlock; bool write_pending; wait_queue_head_t write_queue; struct usb_request *req; @@ -204,20 +204,20 @@ static ssize_t f_hidg_read(struct file *file, char __user *buffer, if (!access_ok(VERIFY_WRITE, buffer, count)) return -EFAULT; - spin_lock_irqsave(&hidg->spinlock, flags); + spin_lock_irqsave(&hidg->read_spinlock, flags); #define READ_COND (!list_empty(&hidg->completed_out_req)) /* wait for at least one buffer to complete */ while (!READ_COND) { - spin_unlock_irqrestore(&hidg->spinlock, flags); + spin_unlock_irqrestore(&hidg->read_spinlock, flags); if (file->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(hidg->read_queue, READ_COND)) return -ERESTARTSYS; - spin_lock_irqsave(&hidg->spinlock, flags); + spin_lock_irqsave(&hidg->read_spinlock, flags); } /* pick the first one */ @@ -232,7 +232,7 @@ static ssize_t f_hidg_read(struct file *file, char __user *buffer, req = list->req; count = min_t(unsigned int, count, req->actual - list->pos); - spin_unlock_irqrestore(&hidg->spinlock, flags); + spin_unlock_irqrestore(&hidg->read_spinlock, flags); /* copy to user outside spinlock */ count -= copy_to_user(buffer, req->buf + list->pos, count); @@ -254,9 +254,9 @@ static ssize_t f_hidg_read(struct file *file, char __user *buffer, return ret; } } else { - spin_lock_irqsave(&hidg->spinlock, flags); + spin_lock_irqsave(&hidg->read_spinlock, flags); list_add(&list->list, &hidg->completed_out_req); - spin_unlock_irqrestore(&hidg->spinlock, flags); + spin_unlock_irqrestore(&hidg->read_spinlock, flags); wake_up(&hidg->read_queue); } @@ -267,13 +267,16 @@ static ssize_t f_hidg_read(struct file *file, char __user *buffer, static void f_hidg_req_complete(struct usb_ep *ep, struct usb_request *req) { struct f_hidg *hidg = (struct f_hidg *)ep->driver_data; + unsigned long flags; if (req->status != 0) { ERROR(hidg->func.config->cdev, "End Point Request ERROR: %d\n", req->status); } + spin_lock_irqsave(&hidg->write_spinlock, flags); hidg->write_pending = 0; + spin_unlock_irqrestore(&hidg->write_spinlock, flags); wake_up(&hidg->write_queue); } @@ -281,18 +284,19 @@ static ssize_t f_hidg_write(struct file *file, const char __user *buffer, size_t count, loff_t *offp) { struct f_hidg *hidg = file->private_data; + unsigned long flags; ssize_t status = -ENOMEM; if (!access_ok(VERIFY_READ, buffer, count)) return -EFAULT; - mutex_lock(&hidg->lock); + spin_lock_irqsave(&hidg->write_spinlock, flags); #define WRITE_COND (!hidg->write_pending) /* write queue */ while (!WRITE_COND) { - mutex_unlock(&hidg->lock); + spin_unlock_irqrestore(&hidg->write_spinlock, flags); if (file->f_flags & O_NONBLOCK) return -EAGAIN; @@ -300,17 +304,20 @@ static ssize_t f_hidg_write(struct file *file, const char __user *buffer, hidg->write_queue, WRITE_COND)) return -ERESTARTSYS; - mutex_lock(&hidg->lock); + spin_lock_irqsave(&hidg->write_spinlock, flags); } + hidg->write_pending = 1; count = min_t(unsigned, count, hidg->report_length); + + spin_unlock_irqrestore(&hidg->write_spinlock, flags); status = copy_from_user(hidg->req->buf, buffer, count); if (status != 0) { ERROR(hidg->func.config->cdev, "copy_from_user error\n"); - mutex_unlock(&hidg->lock); - return -EINVAL; + status = -EINVAL; + goto release_write_pending; } hidg->req->status = 0; @@ -318,19 +325,23 @@ static ssize_t f_hidg_write(struct file *file, const char __user *buffer, hidg->req->length = count; hidg->req->complete = f_hidg_req_complete; hidg->req->context = hidg; - hidg->write_pending = 1; status = usb_ep_queue(hidg->in_ep, hidg->req, GFP_ATOMIC); if (status < 0) { ERROR(hidg->func.config->cdev, "usb_ep_queue error on int endpoint %zd\n", status); - hidg->write_pending = 0; - wake_up(&hidg->write_queue); + goto release_write_pending; } else { status = count; } - mutex_unlock(&hidg->lock); + return status; +release_write_pending: + spin_lock_irqsave(&hidg->write_spinlock, flags); + hidg->write_pending = 0; + spin_unlock_irqrestore(&hidg->write_spinlock, flags); + + wake_up(&hidg->write_queue); return status; } @@ -397,9 +408,9 @@ static void hidg_set_report_complete(struct usb_ep *ep, struct usb_request *req) req_list->req = req; - spin_lock_irqsave(&hidg->spinlock, flags); + spin_lock_irqsave(&hidg->read_spinlock, flags); list_add_tail(&req_list->list, &hidg->completed_out_req); - spin_unlock_irqrestore(&hidg->spinlock, flags); + spin_unlock_irqrestore(&hidg->read_spinlock, flags); wake_up(&hidg->read_queue); break; @@ -523,13 +534,13 @@ static void hidg_disable(struct usb_function *f) usb_ep_disable(hidg->in_ep); usb_ep_disable(hidg->out_ep); - spin_lock_irqsave(&hidg->spinlock, flags); + spin_lock_irqsave(&hidg->read_spinlock, flags); list_for_each_entry_safe(list, next, &hidg->completed_out_req, list) { free_ep_req(hidg->out_ep, list->req); list_del(&list->list); kfree(list); } - spin_unlock_irqrestore(&hidg->spinlock, flags); + spin_unlock_irqrestore(&hidg->read_spinlock, flags); } static int hidg_set_alt(struct usb_function *f, unsigned intf, unsigned alt) @@ -678,8 +689,8 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) if (status) goto fail; - mutex_init(&hidg->lock); - spin_lock_init(&hidg->spinlock); + spin_lock_init(&hidg->write_spinlock); + spin_lock_init(&hidg->read_spinlock); init_waitqueue_head(&hidg->write_queue); init_waitqueue_head(&hidg->read_queue); INIT_LIST_HEAD(&hidg->completed_out_req); From 879dc37cc3a81c99527410a83ae9cc35c637e8b1 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 7 Dec 2016 01:16:25 -0800 Subject: [PATCH 108/154] hv: allocate synic pages for all present CPUs commit 421b8f20d3c381b215f988b42428f56fc3b82405 upstream. It may happen that not all CPUs are online when we do hv_synic_alloc() and in case more CPUs come online later we may try accessing these allocated structures. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 60dbd6cb4640..e4bb498e41ae 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -411,7 +411,7 @@ int hv_synic_alloc(void) goto err; } - for_each_online_cpu(cpu) { + for_each_present_cpu(cpu) { hv_context.event_dpc[cpu] = kmalloc(size, GFP_ATOMIC); if (hv_context.event_dpc[cpu] == NULL) { pr_err("Unable to allocate event dpc\n"); @@ -482,7 +482,7 @@ void hv_synic_free(void) int cpu; kfree(hv_context.hv_numa_map); - for_each_online_cpu(cpu) + for_each_present_cpu(cpu) hv_synic_free_cpu(cpu); } From 6e936b06795ae5dcb5248e0c4344d8303de4c2db Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 7 Dec 2016 01:16:26 -0800 Subject: [PATCH 109/154] hv: init percpu_list in hv_synic_alloc() commit 3c7630d35009e6635e5b58d62de554fd5b6db5df upstream. Initializing hv_context.percpu_list in hv_synic_alloc() helps to prevent a crash in percpu_channel_enq() when not all CPUs were online during initialization and it naturally belongs there. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index e4bb498e41ae..a2567a4be1a8 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -457,6 +457,8 @@ int hv_synic_alloc(void) pr_err("Unable to allocate post msg page\n"); goto err; } + + INIT_LIST_HEAD(&hv_context.percpu_list[cpu]); } return 0; @@ -552,8 +554,6 @@ void hv_synic_init(void *arg) rdmsrl(HV_X64_MSR_VP_INDEX, vp_index); hv_context.vp_index[cpu] = (u32)vp_index; - INIT_LIST_HEAD(&hv_context.percpu_list[cpu]); - /* * Register the per-cpu clockevent source. */ From 664f72a17f2c072175a0083388f315f811ff1acb Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 7 Dec 2016 01:16:27 -0800 Subject: [PATCH 110/154] hv: don't reset hv_context.tsc_page on crash commit 56ef6718a1d8d77745033c5291e025ce18504159 upstream. It may happen that secondary CPUs are still alive and resetting hv_context.tsc_page will cause a consequent crash in read_hv_clock_tsc() as we don't check for it being not NULL there. It is safe as we're not freeing this page anyways. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index a2567a4be1a8..6e49a4dd99c0 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -309,9 +309,10 @@ void hv_cleanup(bool crash) hypercall_msr.as_uint64 = 0; wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64); - if (!crash) + if (!crash) { vfree(hv_context.tsc_page); - hv_context.tsc_page = NULL; + hv_context.tsc_page = NULL; + } } #endif } From e44eab51e74c59a0b01c1fce2159c82b3c8a31b0 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 7 Dec 2016 01:16:28 -0800 Subject: [PATCH 111/154] Drivers: hv: vmbus: Prevent sending data on a rescinded channel commit e7e97dd8b77ee7366f2f8c70a033bf5fa05ec2e0 upstream. After the channel is rescinded, the host does not read from the rescinded channel. Fail writes to a channel that has already been rescinded. If we permit writes on a rescinded channel, since the host will not respond we will have situations where we will be unable to unload vmbus drivers that cannot have any outstanding requests to the host at the point they are unoaded. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/ring_buffer.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 308dbda700eb..e94ed1c22c8b 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -298,6 +298,9 @@ int hv_ringbuffer_write(struct vmbus_channel *channel, unsigned long flags = 0; struct hv_ring_buffer_info *outring_info = &channel->outbound; + if (channel->rescind) + return -ENODEV; + for (i = 0; i < kv_count; i++) totalbytes_towrite += kv_list[i].iov_len; @@ -350,6 +353,10 @@ int hv_ringbuffer_write(struct vmbus_channel *channel, spin_unlock_irqrestore(&outring_info->ring_lock, flags); hv_signal_on_write(old_write, channel, kick_q); + + if (channel->rescind) + return -ENODEV; + return 0; } From 728fe696fd3ee551c24e02e7826dd9bdb89f1d47 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 22 Dec 2016 16:54:00 -0800 Subject: [PATCH 112/154] Drivers: hv: vmbus: Fix a rescind handling bug commit ccb61f8a99e6c29df4fb96a65dad4fad740d5be9 upstream. The host can rescind a channel that has been offered to the guest and once the channel is rescinded, the host does not respond to any requests on that channel. Deal with the case where the guest may be blocked waiting for a response from the host. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 18 ++++++++++++++++++ drivers/hv/channel_mgmt.c | 25 +++++++++++++++++++++++++ include/linux/hyperv.h | 1 + 3 files changed, 44 insertions(+) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index d5b8d9fd50bb..be34547cdb68 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -157,6 +157,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, } init_completion(&open_info->waitevent); + open_info->waiting_channel = newchannel; open_msg = (struct vmbus_channel_open_channel *)open_info->msg; open_msg->header.msgtype = CHANNELMSG_OPENCHANNEL; @@ -194,6 +195,11 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, list_del(&open_info->msglistentry); spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); + if (newchannel->rescind) { + err = -ENODEV; + goto error_free_gpadl; + } + if (open_info->response.open_result.status) { err = -EAGAIN; goto error_free_gpadl; @@ -405,6 +411,7 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, return ret; init_completion(&msginfo->waitevent); + msginfo->waiting_channel = channel; gpadlmsg = (struct vmbus_channel_gpadl_header *)msginfo->msg; gpadlmsg->header.msgtype = CHANNELMSG_GPADL_HEADER; @@ -441,6 +448,11 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, } wait_for_completion(&msginfo->waitevent); + if (channel->rescind) { + ret = -ENODEV; + goto cleanup; + } + /* At this point, we received the gpadl created msg */ *gpadl_handle = gpadlmsg->gpadl; @@ -474,6 +486,7 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle) return -ENOMEM; init_completion(&info->waitevent); + info->waiting_channel = channel; msg = (struct vmbus_channel_gpadl_teardown *)info->msg; @@ -493,6 +506,11 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle) wait_for_completion(&info->waitevent); + if (channel->rescind) { + ret = -ENODEV; + goto post_msg_err; + } + post_msg_err: spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); list_del(&info->msglistentry); diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index f3a8b52acb51..cb9531541a12 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -147,6 +147,29 @@ static const struct { { HV_RDV_GUID }, }; +/* + * The rescinded channel may be blocked waiting for a response from the host; + * take care of that. + */ +static void vmbus_rescind_cleanup(struct vmbus_channel *channel) +{ + struct vmbus_channel_msginfo *msginfo; + unsigned long flags; + + + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); + + list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, + msglistentry) { + + if (msginfo->waiting_channel == channel) { + complete(&msginfo->waitevent); + break; + } + } + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); +} + static bool is_unsupported_vmbus_devs(const uuid_le *guid) { int i; @@ -825,6 +848,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) channel->rescind = true; spin_unlock_irqrestore(&channel->lock, flags); + vmbus_rescind_cleanup(channel); + if (channel->device_obj) { if (channel->chn_rescind_callback) { channel->chn_rescind_callback(channel); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index c92a083bcf16..192eef2fd766 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -641,6 +641,7 @@ struct vmbus_channel_msginfo { /* Synchronize the request/response if needed */ struct completion waitevent; + struct vmbus_channel *waiting_channel; union { struct vmbus_channel_version_supported version_supported; struct vmbus_channel_open_result open_result; From d4d5b507c6427b417f4ff5411649e55af4447c84 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 22 Dec 2016 16:54:01 -0800 Subject: [PATCH 113/154] Drivers: hv: util: kvp: Fix a rescind processing issue commit 5a66fecbf6aa528e375cbebccb1061cc58d80c84 upstream. KVP may use a char device to support the communication between the user level daemon and the driver. When the KVP channel is rescinded we need to make sure that the char device is fully cleaned up before we can process a new KVP offer from the host. Implement this logic. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_kvp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index 5e1fdc8d32ab..3abfc5983c97 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -88,6 +88,7 @@ static DECLARE_WORK(kvp_sendkey_work, kvp_send_key); static const char kvp_devname[] = "vmbus/hv_kvp"; static u8 *recv_buffer; static struct hvutil_transport *hvt; +static struct completion release_event; /* * Register the kernel component with the user-level daemon. * As part of this registration, pass the LIC version number. @@ -716,6 +717,7 @@ static void kvp_on_reset(void) if (cancel_delayed_work_sync(&kvp_timeout_work)) kvp_respond_to_host(NULL, HV_E_FAIL); kvp_transaction.state = HVUTIL_DEVICE_INIT; + complete(&release_event); } int @@ -724,6 +726,7 @@ hv_kvp_init(struct hv_util_service *srv) recv_buffer = srv->recv_buffer; kvp_transaction.recv_channel = srv->channel; + init_completion(&release_event); /* * When this driver loads, the user level daemon that * processes the host requests may not yet be running. @@ -747,4 +750,5 @@ void hv_kvp_deinit(void) cancel_delayed_work_sync(&kvp_timeout_work); cancel_work_sync(&kvp_sendkey_work); hvutil_transport_destroy(hvt); + wait_for_completion(&release_event); } From 4db47d9bf99d8bc3acaa9fd4b6437ea856a2f037 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 22 Dec 2016 16:54:02 -0800 Subject: [PATCH 114/154] Drivers: hv: util: Fcopy: Fix a rescind processing issue commit 20951c7535b5e6af46bc37b7142105f716df739c upstream. Fcopy may use a char device to support the communication between the user level daemon and the driver. When the Fcopy channel is rescinded we need to make sure that the char device is fully cleaned up before we can process a new Fcopy offer from the host. Implement this logic. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_fcopy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c index 8b2ba98831ec..e47d8c9db03a 100644 --- a/drivers/hv/hv_fcopy.c +++ b/drivers/hv/hv_fcopy.c @@ -61,6 +61,7 @@ static DECLARE_WORK(fcopy_send_work, fcopy_send_data); static const char fcopy_devname[] = "vmbus/hv_fcopy"; static u8 *recv_buffer; static struct hvutil_transport *hvt; +static struct completion release_event; /* * This state maintains the version number registered by the daemon. */ @@ -317,6 +318,7 @@ static void fcopy_on_reset(void) if (cancel_delayed_work_sync(&fcopy_timeout_work)) fcopy_respond_to_host(HV_E_FAIL); + complete(&release_event); } int hv_fcopy_init(struct hv_util_service *srv) @@ -324,6 +326,7 @@ int hv_fcopy_init(struct hv_util_service *srv) recv_buffer = srv->recv_buffer; fcopy_transaction.recv_channel = srv->channel; + init_completion(&release_event); /* * When this driver loads, the user level daemon that * processes the host requests may not yet be running. @@ -345,4 +348,5 @@ void hv_fcopy_deinit(void) fcopy_transaction.state = HVUTIL_DEVICE_DYING; cancel_delayed_work_sync(&fcopy_timeout_work); hvutil_transport_destroy(hvt); + wait_for_completion(&release_event); } From fbff994b8a464cd5de50fbb2e0b8341becb70336 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 22 Dec 2016 16:54:03 -0800 Subject: [PATCH 115/154] Drivers: hv: util: Backup: Fix a rescind processing issue commit d77044d142e960f7b5f814a91ecb8bcf86aa552c upstream. VSS may use a char device to support the communication between the user level daemon and the driver. When the VSS channel is rescinded we need to make sure that the char device is fully cleaned up before we can process a new VSS offer from the host. Implement this logic. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_snapshot.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c index a6707133c297..a76e3db0d01f 100644 --- a/drivers/hv/hv_snapshot.c +++ b/drivers/hv/hv_snapshot.c @@ -66,6 +66,7 @@ static int dm_reg_value; static const char vss_devname[] = "vmbus/hv_vss"; static __u8 *recv_buffer; static struct hvutil_transport *hvt; +static struct completion release_event; static void vss_timeout_func(struct work_struct *dummy); static void vss_handle_request(struct work_struct *dummy); @@ -330,11 +331,13 @@ static void vss_on_reset(void) if (cancel_delayed_work_sync(&vss_timeout_work)) vss_respond_to_host(HV_E_FAIL); vss_transaction.state = HVUTIL_DEVICE_INIT; + complete(&release_event); } int hv_vss_init(struct hv_util_service *srv) { + init_completion(&release_event); if (vmbus_proto_version < VERSION_WIN8_1) { pr_warn("Integration service 'Backup (volume snapshot)'" " not supported on this host version.\n"); @@ -365,4 +368,5 @@ void hv_vss_deinit(void) cancel_delayed_work_sync(&vss_timeout_work); cancel_work_sync(&vss_handle_request_work); hvutil_transport_destroy(hvt); + wait_for_completion(&release_event); } From 50fc62d5eeb3392f11161637b5ed4e0d340fed74 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 22 Dec 2016 18:07:52 -0700 Subject: [PATCH 116/154] RDMA/core: Fix incorrect structure packing for booleans commit 55efcfcd7776165b294f8b5cd6e05ca00ec89b7c upstream. The RDMA core uses ib_pack() to convert from unpacked CPU structs to on-the-wire bitpacked structs. This process requires that 1 bit fields are declared as u8 in the unpacked struct, otherwise the packing process does not read the value properly and the packed result is wired to 0. Several places wrongly used int. Crucially this means the kernel has never, set reversible correctly in the path record request. It has always asked for irreversible paths even if the ULP requests otherwise. When the kernel is used with a SM that supports this feature, it completely breaks communication management if reversible paths are not properly requested. The only reason this ever worked is because opensm ignores the reversible bit. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Gunthorpe Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- include/rdma/ib_sa.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 5ee7aab95eb8..fd0e53219f93 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -153,12 +153,12 @@ struct ib_sa_path_rec { union ib_gid sgid; __be16 dlid; __be16 slid; - int raw_traffic; + u8 raw_traffic; /* reserved */ __be32 flow_label; u8 hop_limit; u8 traffic_class; - int reversible; + u8 reversible; u8 numb_path; __be16 pkey; __be16 qos_class; @@ -220,7 +220,7 @@ struct ib_sa_mcmember_rec { u8 hop_limit; u8 scope; u8 join_state; - int proxy_join; + u8 proxy_join; }; /* Service Record Component Mask Sec 15.2.5.14 Ver 1.1 */ From c8cdd9234caced510db1a03491af475938f9855a Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 21 Feb 2017 11:21:57 -0800 Subject: [PATCH 117/154] rdma_cm: fail iwarp accepts w/o connection params commit f2625f7db4dd0bbd16a9c7d2950e7621f9aa57ad upstream. cma_accept_iw() needs to return an error if conn_params is NULL. Since this is coming from user space, we can crash. Reported-by: Shaobo He Acked-by: Sean Hefty Signed-off-by: Steve Wise Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index c25768c2dd3b..f2d40c05ef9e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3540,6 +3540,9 @@ static int cma_accept_iw(struct rdma_id_private *id_priv, struct iw_cm_conn_param iw_param; int ret; + if (!conn_param) + return -EINVAL; + ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) return ret; From 4cf918c804c657c3a8b9cd4bb46171315bdfad62 Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Wed, 22 Feb 2017 12:05:03 -0500 Subject: [PATCH 118/154] gfs2: Add missing rcu locking for glock lookup commit f38e5fb95a1f8feda88531eedc98f69b24748712 upstream. We must hold the rcu read lock across looking up glocks and trying to bump their refcount to prevent the glocks from being freed in between. Signed-off-by: Andrew Price Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/glock.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 14cbf60167a7..133f322573b5 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -658,9 +658,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, struct kmem_cache *cachep; int ret, tries = 0; + rcu_read_lock(); gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms); if (gl && !lockref_get_not_dead(&gl->gl_lockref)) gl = NULL; + rcu_read_unlock(); *glp = gl; if (gl) @@ -728,15 +730,18 @@ again: if (ret == -EEXIST) { ret = 0; + rcu_read_lock(); tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms); if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) { if (++tries < 100) { + rcu_read_unlock(); cond_resched(); goto again; } tmp = NULL; ret = -ENOMEM; } + rcu_read_unlock(); } else { WARN_ON_ONCE(ret); } From 24d77f99a7b74b4b68682d54ee79992b8aed7421 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 27 Jan 2017 02:06:36 -0800 Subject: [PATCH 119/154] remoteproc: qcom: mdt_loader: Don't overwrite firmware object commit 3e8b571a9a0881ba3381ca0915995696da145ab8 upstream. The "fw" firmware object is passed from the remoteproc core and should not be overwritten, as that results in leaked buffers and a double free of the the last firmware object. Fixes: 051fb70fd4ea ("remoteproc: qcom: Driver for the self-authenticating Hexagon v5") Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/remoteproc/qcom_mdt_loader.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/remoteproc/qcom_mdt_loader.c b/drivers/remoteproc/qcom_mdt_loader.c index 114e8e4cef67..04db02d9059d 100644 --- a/drivers/remoteproc/qcom_mdt_loader.c +++ b/drivers/remoteproc/qcom_mdt_loader.c @@ -115,6 +115,7 @@ int qcom_mdt_load(struct rproc *rproc, const struct elf32_phdr *phdrs; const struct elf32_phdr *phdr; const struct elf32_hdr *ehdr; + const struct firmware *seg_fw; size_t fw_name_len; char *fw_name; void *ptr; @@ -153,16 +154,16 @@ int qcom_mdt_load(struct rproc *rproc, if (phdr->p_filesz) { sprintf(fw_name + fw_name_len - 3, "b%02d", i); - ret = request_firmware(&fw, fw_name, &rproc->dev); + ret = request_firmware(&seg_fw, fw_name, &rproc->dev); if (ret) { dev_err(&rproc->dev, "failed to load %s\n", fw_name); break; } - memcpy(ptr, fw->data, fw->size); + memcpy(ptr, seg_fw->data, seg_fw->size); - release_firmware(fw); + release_firmware(seg_fw); } if (phdr->p_memsz > phdr->p_filesz) From d56dd01bc291962f5fb4b53f6ed6de852af0a0e3 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Wed, 28 Dec 2016 15:40:04 -0600 Subject: [PATCH 120/154] rtlwifi: Fix alignment issues commit 40b368af4b750863b2cb66a3a9513241db2f0793 upstream. The addresses of Wlan NIC registers are natural alignment, but some drivers have bugs. These are evident on platforms that need natural alignment to access registers. This change contains the following: 1. Function _rtl8821ae_dbi_read() is used to read one byte from DBI, thus it should use rtl_read_byte(). 2. Register 0x4C7 of 8192ee is single byte. Signed-off-by: Ping-Ke Shih Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c index ebf663e1a81a..cab4601eba8e 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c @@ -1006,7 +1006,7 @@ static void _rtl92ee_hw_configure(struct ieee80211_hw *hw) rtl_write_word(rtlpriv, REG_SIFS_TRX, 0x100a); /* Note Data sheet don't define */ - rtl_write_word(rtlpriv, 0x4C7, 0x80); + rtl_write_byte(rtlpriv, 0x4C7, 0x80); rtl_write_byte(rtlpriv, REG_RX_PKT_LIMIT, 0x20); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index 1281ebe0c30a..2cbef9647acc 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -1128,7 +1128,7 @@ static u8 _rtl8821ae_dbi_read(struct rtl_priv *rtlpriv, u16 addr) } if (0 == tmp) { read_addr = REG_DBI_RDATA + addr % 4; - ret = rtl_read_word(rtlpriv, read_addr); + ret = rtl_read_byte(rtlpriv, read_addr); } return ret; } From 5ea52fac0d827452cd2887722580ab58fdbd2d7d Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sun, 5 Feb 2017 10:24:22 -0600 Subject: [PATCH 121/154] rtlwifi: rtl8192c-common: Fix "BUG: KASAN: commit 6773386f977ce5af339f9678fa2918909a946c6b upstream. Kernels built with CONFIG_KASAN=y report the following BUG for rtl8192cu and rtl8192c-common: ================================================================== BUG: KASAN: slab-out-of-bounds in rtl92c_dm_bt_coexist+0x858/0x1e40 [rtl8192c_common] at addr ffff8801c90edb08 Read of size 1 by task kworker/0:1/38 page:ffffea0007243800 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 flags: 0x8000000000004000(head) page dumped because: kasan: bad access detected CPU: 0 PID: 38 Comm: kworker/0:1 Not tainted 4.9.7-gentoo #3 Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./Z77-DS3H, BIOS F11a 11/13/2013 Workqueue: rtl92c_usb rtl_watchdog_wq_callback [rtlwifi] 0000000000000000 ffffffff829eea33 ffff8801d7f0fa30 ffff8801c90edb08 ffffffff824c0f09 ffff8801d4abee80 0000000000000004 0000000000000297 ffffffffc070b57c ffff8801c7aa7c48 ffff880100000004 ffffffff000003e8 Call Trace: [] ? dump_stack+0x5c/0x79 [] ? kasan_report_error+0x4b9/0x4e0 [] ? _usb_read_sync+0x15c/0x280 [rtl_usb] [] ? __asan_report_load1_noabort+0x45/0x50 [] ? rtl92c_dm_bt_coexist+0x858/0x1e40 [rtl8192c_common] [] ? rtl92c_dm_bt_coexist+0x858/0x1e40 [rtl8192c_common] [] ? rtl92c_dm_rf_saving+0x96e/0x1330 [rtl8192c_common] ... The problem is due to rtl8192ce and rtl8192cu sharing routines, and having different layouts of struct rtl_pci_priv, which is used by rtl8192ce, and struct rtl_usb_priv, which is used by rtl8192cu. The problem was resolved by placing the struct bt_coexist_info at the head of each of those private areas. Reported-and-tested-by: Dmitry Osipenko Signed-off-by: Larry Finger Cc: Dmitry Osipenko Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/pci.h | 4 ++-- drivers/net/wireless/realtek/rtlwifi/usb.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.h b/drivers/net/wireless/realtek/rtlwifi/pci.h index b951ebac15ea..d2f4dd470fdb 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.h +++ b/drivers/net/wireless/realtek/rtlwifi/pci.h @@ -275,10 +275,10 @@ struct mp_adapter { }; struct rtl_pci_priv { + struct bt_coexist_info bt_coexist; + struct rtl_led_ctl ledctl; struct rtl_pci dev; struct mp_adapter ndis_adapter; - struct rtl_led_ctl ledctl; - struct bt_coexist_info bt_coexist; }; #define rtl_pcipriv(hw) (((struct rtl_pci_priv *)(rtl_priv(hw))->priv)) diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.h b/drivers/net/wireless/realtek/rtlwifi/usb.h index 685273ca9561..441c4412130c 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.h +++ b/drivers/net/wireless/realtek/rtlwifi/usb.h @@ -150,8 +150,9 @@ struct rtl_usb { }; struct rtl_usb_priv { - struct rtl_usb dev; + struct bt_coexist_info bt_coexist; struct rtl_led_ctl ledctl; + struct rtl_usb dev; }; #define rtl_usbpriv(hw) (((struct rtl_usb_priv *)(rtl_priv(hw))->priv)) From 5af94e637fd8b2b89ea49bfd112b37137fbecb0e Mon Sep 17 00:00:00 2001 From: Stefano Babic Date: Fri, 20 Jan 2017 10:38:20 -0500 Subject: [PATCH 122/154] VME: restore bus_remove function causing incomplete module unload commit 9797484ba83d68f18fe1cbd964b7cd830f78f0f7 upstream. Commit 050c3d52cc7810d9d17b8cd231708609af6876ae ("vme: make core vme support explicitly non-modular") dropped the remove function because it appeared as if it was for removal of the bus, which is not supported. However, vme_bus_remove() is called when a VME device is removed from the bus and not when the bus is removed; as it calls the VME device driver's cleanup function. Without this function, the remove() in the VME device driver is never called and VME device drivers cannot be reloaded again. Here we restore the remove function that was deleted in that commit, and the reference to the function in the bus structure. Fixes: 050c3d52cc78 ("vme: make core vme support explicitly non-modular") Cc: Manohar Vanga Acked-by: Martyn Welch Cc: devel@driverdev.osuosl.org Signed-off-by: Stefano Babic Signed-off-by: Paul Gortmaker Signed-off-by: Greg Kroah-Hartman --- drivers/vme/vme.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/vme/vme.c b/drivers/vme/vme.c index bdbadaa47ef3..0035cf79760a 100644 --- a/drivers/vme/vme.c +++ b/drivers/vme/vme.c @@ -1625,10 +1625,25 @@ static int vme_bus_probe(struct device *dev) return retval; } +static int vme_bus_remove(struct device *dev) +{ + int retval = -ENODEV; + struct vme_driver *driver; + struct vme_dev *vdev = dev_to_vme_dev(dev); + + driver = dev->platform_data; + + if (driver->remove != NULL) + retval = driver->remove(vdev); + + return retval; +} + struct bus_type vme_bus_type = { .name = "vme", .match = vme_bus_match, .probe = vme_bus_probe, + .remove = vme_bus_remove, }; EXPORT_SYMBOL(vme_bus_type); From 9bdd39c146fc4f7da1ceb91ac1ebe80e7f1a8d41 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Feb 2017 17:04:42 -0500 Subject: [PATCH 123/154] nfsd: minor nfsd_setattr cleanup commit 758e99fefe1d9230111296956335cd35995c0eaf upstream. Simplify exit paths, size_change use. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/vfs.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 8ca642fe9b21..186f426a3699 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -377,7 +377,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, __be32 err; int host_err; bool get_write_count; - int size_change = 0; + bool size_change = (iap->ia_valid & ATTR_SIZE); if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; @@ -390,11 +390,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, /* Get inode */ err = fh_verify(rqstp, fhp, ftype, accmode); if (err) - goto out; + return err; if (get_write_count) { host_err = fh_want_write(fhp); if (host_err) - return nfserrno(host_err); + goto out; } dentry = fhp->fh_dentry; @@ -405,19 +405,21 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid &= ~ATTR_MODE; if (!iap->ia_valid) - goto out; + return 0; nfsd_sanitize_attrs(inode, iap); + if (check_guard && guardtime != inode->i_ctime.tv_sec) + return nfserr_notsync; + /* * The size case is special, it changes the file in addition to the * attributes. */ - if (iap->ia_valid & ATTR_SIZE) { + if (size_change) { err = nfsd_get_write_access(rqstp, fhp, iap); if (err) - goto out; - size_change = 1; + return err; /* * RFC5661, Section 18.30.4: @@ -432,23 +434,16 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid |= ATTR_CTIME; - if (check_guard && guardtime != inode->i_ctime.tv_sec) { - err = nfserr_notsync; - goto out_put_write_access; - } - fh_lock(fhp); host_err = notify_change(dentry, iap, NULL); fh_unlock(fhp); - err = nfserrno(host_err); -out_put_write_access: if (size_change) put_write_access(inode); - if (!err) - err = nfserrno(commit_metadata(fhp)); out: - return err; + if (!host_err) + host_err = commit_metadata(fhp); + return nfserrno(host_err); } #if defined(CONFIG_NFSD_V4) From a3c6cbc4eac4473ed5461d5faae2794d3e5c0e44 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Feb 2017 07:21:33 +0100 Subject: [PATCH 124/154] nfsd: special case truncates some more commit 783112f7401ff449d979530209b3f6c2594fdb4e upstream. Both the NFS protocols and the Linux VFS use a setattr operation with a bitmap of attributes to set to set various file attributes including the file size and the uid/gid. The Linux syscalls never mix size updates with unrelated updates like the uid/gid, and some file systems like XFS and GFS2 rely on the fact that truncates don't update random other attributes, and many other file systems handle the case but do not update the other attributes in the same transaction. NFSD on the other hand passes the attributes it gets on the wire more or less directly through to the VFS, leading to updates the file systems don't expect. XFS at least has an assert on the allowed attributes, which caught an unusual NFS client setting the size and group at the same time. To handle this issue properly this splits the notify_change call in nfsd_setattr into two separate ones. Signed-off-by: Christoph Hellwig Tested-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/vfs.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 186f426a3699..b829cc9a9b39 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -414,13 +414,19 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, /* * The size case is special, it changes the file in addition to the - * attributes. + * attributes, and file systems don't expect it to be mixed with + * "random" attribute changes. We thus split out the size change + * into a separate call to ->setattr, and do the rest as a separate + * setattr call. */ if (size_change) { err = nfsd_get_write_access(rqstp, fhp, iap); if (err) return err; + } + fh_lock(fhp); + if (size_change) { /* * RFC5661, Section 18.30.4: * Changing the size of a file with SETATTR indirectly @@ -428,16 +434,30 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, * * (and similar for the older RFCs) */ - if (iap->ia_size != i_size_read(inode)) - iap->ia_valid |= ATTR_MTIME; + struct iattr size_attr = { + .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME, + .ia_size = iap->ia_size, + }; + + host_err = notify_change(dentry, &size_attr, NULL); + if (host_err) + goto out_unlock; + iap->ia_valid &= ~ATTR_SIZE; + + /* + * Avoid the additional setattr call below if the only other + * attribute that the client sends is the mtime, as we update + * it as part of the size change above. + */ + if ((iap->ia_valid & ~ATTR_MTIME) == 0) + goto out_unlock; } iap->ia_valid |= ATTR_CTIME; - - fh_lock(fhp); host_err = notify_change(dentry, iap, NULL); - fh_unlock(fhp); +out_unlock: + fh_unlock(fhp); if (size_change) put_write_access(inode); out: From 0465339eb54953f0be1f03e980b07eeb01e16fca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 8 Feb 2017 11:29:46 -0500 Subject: [PATCH 125/154] NFSv4: Fix memory and state leak in _nfs4_open_and_get_state commit a974deee477af89411e0f80456bfb344ac433c98 upstream. If we exit because the file access check failed, we currently leak the struct nfs4_state. We need to attach it to the open context before returning. Fixes: 3efb9722475e ("NFSv4: Refactor _nfs4_open_and_get_state..") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 78ff8b63d5f7..123105f4be53 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2708,6 +2708,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, ret = PTR_ERR(state); if (IS_ERR(state)) goto out; + ctx->state = state; if (server->caps & NFS_CAP_POSIX_LOCK) set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); if (opendata->o_res.rflags & NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK) @@ -2733,7 +2734,6 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, if (ret != 0) goto out; - ctx->state = state; if (d_inode(dentry) == state->inode) { nfs_inode_attach_open_context(ctx); if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) From 77bbc0c7712a43442e2637247eaa4f01f73f0eb8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Feb 2017 18:42:32 -0500 Subject: [PATCH 126/154] NFSv4: Fix reboot recovery in copy offload commit 9d8cacbf5636657d2cd0dda17438a56d806d3224 upstream. Copy offload code needs to be hooked into the code for handling NFS4ERR_BAD_STATEID by ensuring that we set the "stateid" field in struct nfs4_exception. Reported-by: Olga Kornievskaia Fixes: 2e72448b07dc3 ("NFS: Add COPY nfs operation") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs42proc.c | 63 +++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 608501971fe0..5cda392028ce 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -128,30 +128,26 @@ out_unlock: return err; } -static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src, +static ssize_t _nfs42_proc_copy(struct file *src, struct nfs_lock_context *src_lock, - struct file *dst, loff_t pos_dst, + struct file *dst, struct nfs_lock_context *dst_lock, - size_t count) + struct nfs42_copy_args *args, + struct nfs42_copy_res *res) { - struct nfs42_copy_args args = { - .src_fh = NFS_FH(file_inode(src)), - .src_pos = pos_src, - .dst_fh = NFS_FH(file_inode(dst)), - .dst_pos = pos_dst, - .count = count, - }; - struct nfs42_copy_res res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY], - .rpc_argp = &args, - .rpc_resp = &res, + .rpc_argp = args, + .rpc_resp = res, }; struct inode *dst_inode = file_inode(dst); struct nfs_server *server = NFS_SERVER(dst_inode); + loff_t pos_src = args->src_pos; + loff_t pos_dst = args->dst_pos; + size_t count = args->count; int status; - status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context, + status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context, src_lock, FMODE_READ); if (status) return status; @@ -161,7 +157,7 @@ static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src, if (status) return status; - status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context, + status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock->open_context, dst_lock, FMODE_WRITE); if (status) return status; @@ -171,22 +167,22 @@ static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src, return status; status = nfs4_call_sync(server->client, server, &msg, - &args.seq_args, &res.seq_res, 0); + &args->seq_args, &res->seq_res, 0); if (status == -ENOTSUPP) server->caps &= ~NFS_CAP_COPY; if (status) return status; - if (res.write_res.verifier.committed != NFS_FILE_SYNC) { - status = nfs_commit_file(dst, &res.write_res.verifier.verifier); + if (res->write_res.verifier.committed != NFS_FILE_SYNC) { + status = nfs_commit_file(dst, &res->write_res.verifier.verifier); if (status) return status; } truncate_pagecache_range(dst_inode, pos_dst, - pos_dst + res.write_res.count); + pos_dst + res->write_res.count); - return res.write_res.count; + return res->write_res.count; } ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, @@ -196,8 +192,22 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, struct nfs_server *server = NFS_SERVER(file_inode(dst)); struct nfs_lock_context *src_lock; struct nfs_lock_context *dst_lock; - struct nfs4_exception src_exception = { }; - struct nfs4_exception dst_exception = { }; + struct nfs42_copy_args args = { + .src_fh = NFS_FH(file_inode(src)), + .src_pos = pos_src, + .dst_fh = NFS_FH(file_inode(dst)), + .dst_pos = pos_dst, + .count = count, + }; + struct nfs42_copy_res res; + struct nfs4_exception src_exception = { + .inode = file_inode(src), + .stateid = &args.src_stateid, + }; + struct nfs4_exception dst_exception = { + .inode = file_inode(dst), + .stateid = &args.dst_stateid, + }; ssize_t err, err2; if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY)) @@ -207,7 +217,6 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, if (IS_ERR(src_lock)) return PTR_ERR(src_lock); - src_exception.inode = file_inode(src); src_exception.state = src_lock->open_context->state; dst_lock = nfs_get_lock_context(nfs_file_open_context(dst)); @@ -216,15 +225,17 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, goto out_put_src_lock; } - dst_exception.inode = file_inode(dst); dst_exception.state = dst_lock->open_context->state; do { inode_lock(file_inode(dst)); - err = _nfs42_proc_copy(src, pos_src, src_lock, - dst, pos_dst, dst_lock, count); + err = _nfs42_proc_copy(src, src_lock, + dst, dst_lock, + &args, &res); inode_unlock(file_inode(dst)); + if (err >= 0) + break; if (err == -ENOTSUPP) { err = -EOPNOTSUPP; break; From c65db336d6c6f82e30e0bdb771517297db8e880e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Feb 2017 19:49:09 -0500 Subject: [PATCH 127/154] pNFS/flexfiles: If the layout is invalid, it must be updated before retrying commit df3ab232e462bce20710596d697ade6b72497694 upstream. If we see that our pNFS READ/WRITE/COMMIT operation failed, but we also see that our layout segment is no longer valid, then we need to get a new layout segment before retrying. Fixes: 90816d1ddacf ("NFSv4.1/flexfiles: Don't mark the entire deviceid...") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/flexfilelayout/flexfilelayout.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index a5c38889e7ae..13abd608af0f 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1073,9 +1073,6 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, struct nfs_client *mds_client = mds_server->nfs_client; struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; - if (task->tk_status >= 0) - return 0; - switch (task->tk_status) { /* MDS state errors */ case -NFS4ERR_DELEG_REVOKED: @@ -1176,9 +1173,6 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, { struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); - if (task->tk_status >= 0) - return 0; - switch (task->tk_status) { /* File access problems. Don't mark the device as unavailable */ case -EACCES: @@ -1213,6 +1207,13 @@ static int ff_layout_async_handle_error(struct rpc_task *task, { int vers = clp->cl_nfs_mod->rpc_vers->number; + if (task->tk_status >= 0) + return 0; + + /* Handle the case of an invalid layout segment */ + if (!pnfs_is_valid_lseg(lseg)) + return -NFS4ERR_RESET_TO_PNFS; + switch (vers) { case 3: return ff_layout_async_handle_error_v3(task, lseg, idx); From 3f22cc6f5ca335de95a49e9a217efaf1034e26ae Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 23 Feb 2017 14:53:39 -0500 Subject: [PATCH 128/154] NFSv4: fix getacl head length estimation commit 6682c14bbe505a8b912c57faf544f866777ee48d upstream. Bitmap and attrlen follow immediately after the op reply header. This was an oversight from commit bf118a342f. Consequences of this are just minor efficiency (extra calls to xdr_shrink_bufhead). Fixes: bf118a342f10 "NFSv4: include bitmap in nfsv4 get acl data" Reviewed-by: Kinglong Mee Signed-off-by: J. Bruce Fields Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index fc89e5ed07ee..c9c4d9855976 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2492,7 +2492,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); - replen = hdr.replen + op_decode_hdr_maxsz + 1; + replen = hdr.replen + op_decode_hdr_maxsz; encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); xdr_inline_pages(&req->rq_rcv_buf, replen << 2, From d78f93384da1e21825144fdfac5dc0bc18db21bd Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 23 Feb 2017 14:54:21 -0500 Subject: [PATCH 129/154] NFSv4: fix getacl ERANGE for some ACL buffer sizes commit ed92d8c137b7794c2c2aa14479298b9885967607 upstream. We're not taking into account that the space needed for the (variable length) attr bitmap, with the result that we'd sometimes get a spurious ERANGE when the ACL data got close to the end of a page. Just add in an extra page to make sure. Signed-off-by: Weston Andros Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 123105f4be53..609840de31d3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4990,7 +4990,7 @@ out: */ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) { - struct page *pages[NFS4ACL_MAXPAGES] = {NULL, }; + struct page *pages[NFS4ACL_MAXPAGES + 1] = {NULL, }; struct nfs_getaclargs args = { .fh = NFS_FH(inode), .acl_pages = pages, @@ -5004,13 +5004,9 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .rpc_argp = &args, .rpc_resp = &res, }; - unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); + unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1; int ret = -ENOMEM, i; - /* As long as we're doing a round trip to the server anyway, - * let's be prepared for a page of acl data. */ - if (npages == 0) - npages = 1; if (npages > ARRAY_SIZE(pages)) return -ERANGE; From ec160ad2acaad4a7de6eab7338ef5e3b2b19f907 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Mon, 19 Dec 2016 20:10:48 +0800 Subject: [PATCH 130/154] f2fs: fix a problem of using memory after free commit 7855eba4d6102f811b6dd142d6c749f53b591fa3 upstream. This patch fix a problem of using memory after free in function __try_merge_extent_node. Fixes: 0f825ee6e873 ("f2fs: add new interfaces for extent tree") Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/extent_cache.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 2b06d4fcd954..7b32ce979fe1 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -352,11 +352,12 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode, } if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) { - if (en) - __release_extent_node(sbi, et, prev_ex); next_ex->ei.fofs = ei->fofs; next_ex->ei.blk = ei->blk; next_ex->ei.len += ei->len; + if (en) + __release_extent_node(sbi, et, prev_ex); + en = next_ex; } From d00d1b71d98468ad6fbee590705c27fc1241f96e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 14 Feb 2017 09:54:37 -0800 Subject: [PATCH 131/154] f2fs: fix multiple f2fs_add_link() calls having same name commit 88c5c13a5027b36d914536fdba23f069d7067204 upstream. It turns out a stakable filesystem like sdcardfs in AOSP can trigger multiple vfs_create() to lower filesystem. In that case, f2fs will add multiple dentries having same name which breaks filesystem consistency. Until upper layer fixes, let's work around by f2fs, which shows actually not much performance regression. Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/dir.c | 34 +++++++++++++++++++++++++++++----- fs/f2fs/f2fs.h | 1 + 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 369f4513be37..ebdc90fc71b7 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -207,9 +207,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, f2fs_put_page(dentry_page, 0); } - if (!de && room && F2FS_I(dir)->chash != namehash) { - F2FS_I(dir)->chash = namehash; - F2FS_I(dir)->clevel = level; + /* This is to increase the speed of f2fs_create */ + if (!de && room) { + F2FS_I(dir)->task = current; + if (F2FS_I(dir)->chash != namehash) { + F2FS_I(dir)->chash = namehash; + F2FS_I(dir)->clevel = level; + } } return de; @@ -643,14 +647,34 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode) { struct fscrypt_name fname; + struct page *page = NULL; + struct f2fs_dir_entry *de = NULL; int err; err = fscrypt_setup_filename(dir, name, 0, &fname); if (err) return err; - err = __f2fs_do_add_link(dir, &fname, inode, ino, mode); - + /* + * An immature stakable filesystem shows a race condition between lookup + * and create. If we have same task when doing lookup and create, it's + * definitely fine as expected by VFS normally. Otherwise, let's just + * verify on-disk dentry one more time, which guarantees filesystem + * consistency more. + */ + if (current != F2FS_I(dir)->task) { + de = __f2fs_find_entry(dir, &fname, &page); + F2FS_I(dir)->task = NULL; + } + if (de) { + f2fs_dentry_kunmap(dir, page); + f2fs_put_page(page, 0); + err = -EEXIST; + } else if (IS_ERR(page)) { + err = PTR_ERR(page); + } else { + err = __f2fs_do_add_link(dir, &fname, inode, ino, mode); + } fscrypt_free_filename(&fname); return err; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 506af456412f..ecc3fd0ca6bb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -431,6 +431,7 @@ struct f2fs_inode_info { atomic_t dirty_pages; /* # of dirty pages */ f2fs_hash_t chash; /* hash value of given file name */ unsigned int clevel; /* maximum level of given file name */ + struct task_struct *task; /* lookup and create consistency */ nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ loff_t last_disk_size; /* lastly written file size */ From 4992ba2840bd40a85887a1a0e5d69f688f58f9dd Mon Sep 17 00:00:00 2001 From: Hou Pengyang Date: Thu, 16 Feb 2017 12:34:31 +0000 Subject: [PATCH 132/154] f2fs: add ovp valid_blocks check for bg gc victim to fg_gc commit e93b9865251a0503d83fd570e7d5a7c8bc351715 upstream. For foreground gc, greedy algorithm should be adapted, which makes this formula work well: (2 * (100 / config.overprovision + 1) + 6) But currently, we fg_gc have a prior to select bg_gc victim segments to gc first, these victims are selected by cost-benefit algorithm, we can't guarantee such segments have the small valid blocks, which may destroy the f2fs rule, on the worstest case, would consume all the free segments. This patch fix this by add a filter in check_bg_victims, if segment's has # of valid blocks over overprovision ratio, skip such segments. Signed-off-by: Hou Pengyang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/f2fs.h | 3 +++ fs/f2fs/gc.c | 22 ++++++++++++++++++++-- fs/f2fs/segment.h | 9 +++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ecc3fd0ca6bb..3a1640be7ffc 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -834,6 +834,9 @@ struct f2fs_sb_info { struct f2fs_gc_kthread *gc_thread; /* GC thread */ unsigned int cur_victim_sec; /* current victim section num */ + /* threshold for converting bg victims for fg */ + u64 fggc_threshold; + /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 6f14ee923acd..34a69e7ed90b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -166,7 +166,8 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->ofs_unit = sbi->segs_per_sec; } - if (p->max_search > sbi->max_victim_search) + /* we need to check every dirty segments in the FG_GC case */ + if (gc_type != FG_GC && p->max_search > sbi->max_victim_search) p->max_search = sbi->max_victim_search; p->offset = sbi->last_victim[p->gc_mode]; @@ -199,6 +200,10 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) { if (sec_usage_check(sbi, secno)) continue; + + if (no_fggc_candidate(sbi, secno)) + continue; + clear_bit(secno, dirty_i->victim_secmap); return secno * sbi->segs_per_sec; } @@ -322,13 +327,15 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, nsearched++; } - secno = GET_SECNO(sbi, segno); if (sec_usage_check(sbi, secno)) goto next; if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) goto next; + if (gc_type == FG_GC && p.alloc_mode == LFS && + no_fggc_candidate(sbi, secno)) + goto next; cost = get_gc_cost(sbi, segno, &p); @@ -972,5 +979,16 @@ stop: void build_gc_manager(struct f2fs_sb_info *sbi) { + u64 main_count, resv_count, ovp_count, blocks_per_sec; + DIRTY_I(sbi)->v_ops = &default_v_ops; + + /* threshold of # of valid blocks in a section for victims of FG_GC */ + main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg; + resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg; + ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; + blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec; + + sbi->fggc_threshold = div_u64((main_count - ovp_count) * blocks_per_sec, + (main_count - resv_count)); } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index fecb856ad874..b164f8339281 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -688,6 +688,15 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) - (base + 1) + type; } +static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi, + unsigned int secno) +{ + if (get_valid_blocks(sbi, secno, sbi->segs_per_sec) >= + sbi->fggc_threshold) + return true; + return false; +} + static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) { if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno)) From 8c53efc399565ef4be8945c9bf45c94153699221 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 27 Feb 2017 11:57:11 -0800 Subject: [PATCH 133/154] f2fs: avoid to issue redundant discard commands commit 8b107f5b97772c7c0c218302e9a4d15b4edf50b4 upstream. If segs_per_sec is over 1 like under SMR, previously f2fs issues discard commands redundantly on the same section, since we didn't move end position for the previous discard command. E.g., start end | | prefree_bitmap = [01111100111100] And, after issue discard for this section, end start | | prefree_bitmap = [01111100111100] Select this section again by searching from (end + 1), start end | | prefree_bitmap = [01111100111100] Fixes: 36abef4e796d38 ("f2fs: introduce mode=lfs mount option") Cc: Damien Le Moal Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fc886f008449..a7943f861d68 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -813,6 +813,8 @@ next: start = start_segno + sbi->segs_per_sec; if (start < end) goto next; + else + end = start - 1; } mutex_unlock(&dirty_i->seglist_lock); From 616f5ef61338a53e997eebbe2c13b21c40df9b86 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 23 Jan 2017 11:41:46 +0100 Subject: [PATCH 134/154] rtc: sun6i: Disable the build as a module commit 3753941475ae6501dcd1e41832bd0e6c35247d6a upstream. Since we have to provide the clock very early on, the RTC driver cannot be built as a module. Make sure that won't happen. Signed-off-by: Maxime Ripard Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/Kconfig | 2 +- drivers/rtc/rtc-sun6i.c | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index e859d148aba9..0723c97ebea3 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1432,7 +1432,7 @@ config RTC_DRV_SUN4V based RTC on SUN4V systems. config RTC_DRV_SUN6I - tristate "Allwinner A31 RTC" + bool "Allwinner A31 RTC" default MACH_SUN6I || MACH_SUN8I || COMPILE_TEST depends on ARCH_SUNXI help diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c index c169a2cd4727..a6136cb99851 100644 --- a/drivers/rtc/rtc-sun6i.c +++ b/drivers/rtc/rtc-sun6i.c @@ -439,9 +439,4 @@ static struct platform_driver sun6i_rtc_driver = { .of_match_table = sun6i_rtc_dt_ids, }, }; - -module_platform_driver(sun6i_rtc_driver); - -MODULE_DESCRIPTION("sun6i RTC driver"); -MODULE_AUTHOR("Chen-Yu Tsai "); -MODULE_LICENSE("GPL"); +builtin_platform_driver(sun6i_rtc_driver); From 6aae7ffa33072e6d3a208039ac015c1a0fb51e37 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 23 Jan 2017 11:41:47 +0100 Subject: [PATCH 135/154] rtc: sun6i: Add some locking commit a9422a19ce270a22fc520f2278fb7e80c58be508 upstream. Some registers have a read-modify-write access pattern that are not atomic. Add some locking to prevent from concurrent accesses. Acked-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-sun6i.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c index a6136cb99851..72086f0c0782 100644 --- a/drivers/rtc/rtc-sun6i.c +++ b/drivers/rtc/rtc-sun6i.c @@ -114,13 +114,17 @@ struct sun6i_rtc_dev { void __iomem *base; int irq; unsigned long alarm; + + spinlock_t lock; }; static irqreturn_t sun6i_rtc_alarmirq(int irq, void *id) { struct sun6i_rtc_dev *chip = (struct sun6i_rtc_dev *) id; + irqreturn_t ret = IRQ_NONE; u32 val; + spin_lock(&chip->lock); val = readl(chip->base + SUN6I_ALRM_IRQ_STA); if (val & SUN6I_ALRM_IRQ_STA_CNT_IRQ_PEND) { @@ -129,10 +133,11 @@ static irqreturn_t sun6i_rtc_alarmirq(int irq, void *id) rtc_update_irq(chip->rtc, 1, RTC_AF | RTC_IRQF); - return IRQ_HANDLED; + ret = IRQ_HANDLED; } + spin_unlock(&chip->lock); - return IRQ_NONE; + return ret; } static void sun6i_rtc_setaie(int to, struct sun6i_rtc_dev *chip) @@ -140,6 +145,7 @@ static void sun6i_rtc_setaie(int to, struct sun6i_rtc_dev *chip) u32 alrm_val = 0; u32 alrm_irq_val = 0; u32 alrm_wake_val = 0; + unsigned long flags; if (to) { alrm_val = SUN6I_ALRM_EN_CNT_EN; @@ -150,9 +156,11 @@ static void sun6i_rtc_setaie(int to, struct sun6i_rtc_dev *chip) chip->base + SUN6I_ALRM_IRQ_STA); } + spin_lock_irqsave(&chip->lock, flags); writel(alrm_val, chip->base + SUN6I_ALRM_EN); writel(alrm_irq_val, chip->base + SUN6I_ALRM_IRQ_EN); writel(alrm_wake_val, chip->base + SUN6I_ALARM_CONFIG); + spin_unlock_irqrestore(&chip->lock, flags); } static int sun6i_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) @@ -191,11 +199,15 @@ static int sun6i_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) static int sun6i_rtc_getalarm(struct device *dev, struct rtc_wkalrm *wkalrm) { struct sun6i_rtc_dev *chip = dev_get_drvdata(dev); + unsigned long flags; u32 alrm_st; u32 alrm_en; + spin_lock_irqsave(&chip->lock, flags); alrm_en = readl(chip->base + SUN6I_ALRM_IRQ_EN); alrm_st = readl(chip->base + SUN6I_ALRM_IRQ_STA); + spin_unlock_irqrestore(&chip->lock, flags); + wkalrm->enabled = !!(alrm_en & SUN6I_ALRM_EN_CNT_EN); wkalrm->pending = !!(alrm_st & SUN6I_ALRM_EN_CNT_EN); rtc_time_to_tm(chip->alarm, &wkalrm->time); @@ -356,6 +368,7 @@ static int sun6i_rtc_probe(struct platform_device *pdev) chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL); if (!chip) return -ENOMEM; + spin_lock_init(&chip->lock); platform_set_drvdata(pdev, chip); chip->dev = &pdev->dev; From de2aa5b3ee76e97dc3cea33cbd3d1bd443fda7a0 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 23 Jan 2017 11:41:48 +0100 Subject: [PATCH 136/154] rtc: sun6i: Switch to the external oscillator commit fb61bb82cb46a932ef2fc62e1c731c8e7e6640d5 upstream. The RTC is clocked from either an internal, imprecise, oscillator or an external one, which is usually much more accurate. The difference perceived between the time elapsed and the time reported by the RTC is in a 10% scale, which prevents the RTC from being useful at all. Fortunately, the external oscillator is reported to be mandatory in the Allwinner datasheet, so we can just switch to it. Fixes: 9765d2d94309 ("rtc: sun6i: Add sun6i RTC driver") Signed-off-by: Maxime Ripard Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-sun6i.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c index 72086f0c0782..b0d45d23a11b 100644 --- a/drivers/rtc/rtc-sun6i.c +++ b/drivers/rtc/rtc-sun6i.c @@ -37,9 +37,11 @@ /* Control register */ #define SUN6I_LOSC_CTRL 0x0000 +#define SUN6I_LOSC_CTRL_KEY (0x16aa << 16) #define SUN6I_LOSC_CTRL_ALM_DHMS_ACC BIT(9) #define SUN6I_LOSC_CTRL_RTC_HMS_ACC BIT(8) #define SUN6I_LOSC_CTRL_RTC_YMD_ACC BIT(7) +#define SUN6I_LOSC_CTRL_EXT_OSC BIT(0) #define SUN6I_LOSC_CTRL_ACC_MASK GENMASK(9, 7) /* RTC */ @@ -417,6 +419,10 @@ static int sun6i_rtc_probe(struct platform_device *pdev) /* disable alarm wakeup */ writel(0, chip->base + SUN6I_ALARM_CONFIG); + /* switch to the external, more precise, oscillator */ + writel(SUN6I_LOSC_CTRL_KEY | SUN6I_LOSC_CTRL_EXT_OSC, + chip->base + SUN6I_LOSC_CTRL); + chip->rtc = rtc_device_register("rtc-sun6i", &pdev->dev, &sun6i_rtc_ops, THIS_MODULE); if (IS_ERR(chip->rtc)) { From 6f9c02ab9d0d5aefa15953ad3d55912e3a7abb57 Mon Sep 17 00:00:00 2001 From: "colyli@suse.de" Date: Sat, 28 Jan 2017 21:11:49 +0800 Subject: [PATCH 137/154] md linear: fix a race between linear_add() and linear_congested() commit 03a9e24ef2aaa5f1f9837356aed79c860521407a upstream. Recently I receive a bug report that on Linux v3.0 based kerenl, hot add disk to a md linear device causes kernel crash at linear_congested(). From the crash image analysis, I find in linear_congested(), mddev->raid_disks contains value N, but conf->disks[] only has N-1 pointers available. Then a NULL pointer deference crashes the kernel. There is a race between linear_add() and linear_congested(), RCU stuffs used in these two functions cannot avoid the race. Since Linuv v4.0 RCU code is replaced by introducing mddev_suspend(). After checking the upstream code, it seems linear_congested() is not called in generic_make_request() code patch, so mddev_suspend() cannot provent it from being called. The possible race still exists. Here I explain how the race still exists in current code. For a machine has many CPUs, on one CPU, linear_add() is called to add a hard disk to a md linear device; at the same time on other CPU, linear_congested() is called to detect whether this md linear device is congested before issuing an I/O request onto it. Now I use a possible code execution time sequence to demo how the possible race happens, seq linear_add() linear_congested() 0 conf=mddev->private 1 oldconf=mddev->private 2 mddev->raid_disks++ 3 for (i=0; iraid_disks;i++) 4 bdev_get_queue(conf->disks[i].rdev->bdev) 5 mddev->private=newconf In linear_add() mddev->raid_disks is increased in time seq 2, and on another CPU in linear_congested() the for-loop iterates conf->disks[i] by the increased mddev->raid_disks in time seq 3,4. But conf with one more element (which is a pointer to struct dev_info type) to conf->disks[] is not updated yet, accessing its structure member in time seq 4 will cause a NULL pointer deference fault. To fix this race, there are 2 parts of modification in the patch, 1) Add 'int raid_disks' in struct linear_conf, as a copy of mddev->raid_disks. It is initialized in linear_conf(), always being consistent with pointers number of 'struct dev_info disks[]'. When iterating conf->disks[] in linear_congested(), use conf->raid_disks to replace mddev->raid_disks in the for-loop, then NULL pointer deference will not happen again. 2) RCU stuffs are back again, and use kfree_rcu() in linear_add() to free oldconf memory. Because oldconf may be referenced as mddev->private in linear_congested(), kfree_rcu() makes sure that its memory will not be released until no one uses it any more. Also some code comments are added in this patch, to make this modification to be easier understandable. This patch can be applied for kernels since v4.0 after commit: 3be260cc18f8 ("md/linear: remove rcu protections in favour of suspend/resume"). But this bug is reported on Linux v3.0 based kernel, for people who maintain kernels before Linux v4.0, they need to do some back back port to this patch. Changelog: - V3: add 'int raid_disks' in struct linear_conf, and use kfree_rcu() to replace rcu_call() in linear_add(). - v2: add RCU stuffs by suggestion from Shaohua and Neil. - v1: initial effort. Signed-off-by: Coly Li Cc: Shaohua Li Cc: Neil Brown Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/linear.c | 39 ++++++++++++++++++++++++++++++++++----- drivers/md/linear.h | 1 + 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 86f5d435901d..b0c0aef92a37 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -52,18 +52,26 @@ static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector) return conf->disks + lo; } +/* + * In linear_congested() conf->raid_disks is used as a copy of + * mddev->raid_disks to iterate conf->disks[], because conf->raid_disks + * and conf->disks[] are created in linear_conf(), they are always + * consitent with each other, but mddev->raid_disks does not. + */ static int linear_congested(struct mddev *mddev, int bits) { struct linear_conf *conf; int i, ret = 0; - conf = mddev->private; + rcu_read_lock(); + conf = rcu_dereference(mddev->private); - for (i = 0; i < mddev->raid_disks && !ret ; i++) { + for (i = 0; i < conf->raid_disks && !ret ; i++) { struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev); ret |= bdi_congested(&q->backing_dev_info, bits); } + rcu_read_unlock(); return ret; } @@ -143,6 +151,19 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) conf->disks[i-1].end_sector + conf->disks[i].rdev->sectors; + /* + * conf->raid_disks is copy of mddev->raid_disks. The reason to + * keep a copy of mddev->raid_disks in struct linear_conf is, + * mddev->raid_disks may not be consistent with pointers number of + * conf->disks[] when it is updated in linear_add() and used to + * iterate old conf->disks[] earray in linear_congested(). + * Here conf->raid_disks is always consitent with number of + * pointers in conf->disks[] array, and mddev->private is updated + * with rcu_assign_pointer() in linear_addr(), such race can be + * avoided. + */ + conf->raid_disks = raid_disks; + return conf; out: @@ -195,15 +216,23 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev) if (!newconf) return -ENOMEM; + /* newconf->raid_disks already keeps a copy of * the increased + * value of mddev->raid_disks, WARN_ONCE() is just used to make + * sure of this. It is possible that oldconf is still referenced + * in linear_congested(), therefore kfree_rcu() is used to free + * oldconf until no one uses it anymore. + */ mddev_suspend(mddev); - oldconf = mddev->private; + oldconf = rcu_dereference(mddev->private); mddev->raid_disks++; - mddev->private = newconf; + WARN_ONCE(mddev->raid_disks != newconf->raid_disks, + "copied raid_disks doesn't match mddev->raid_disks"); + rcu_assign_pointer(mddev->private, newconf); md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); set_capacity(mddev->gendisk, mddev->array_sectors); mddev_resume(mddev); revalidate_disk(mddev->gendisk); - kfree(oldconf); + kfree_rcu(oldconf, rcu); return 0; } diff --git a/drivers/md/linear.h b/drivers/md/linear.h index b685ddd7d7f7..8d392e6098b3 100644 --- a/drivers/md/linear.h +++ b/drivers/md/linear.h @@ -10,6 +10,7 @@ struct linear_conf { struct rcu_head rcu; sector_t array_sectors; + int raid_disks; /* a copy of mddev->raid_disks */ struct dev_info disks[0]; }; #endif From 178d07a0b8c4dc1f6e8a924f04231a5cf9cf86d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sat, 28 Jan 2017 14:31:22 +0100 Subject: [PATCH 138/154] bcma: use (get|put)_device when probing/removing device driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a971df0b9d04674e325346c17de9a895425ca5e1 upstream. This allows tracking device state and e.g. makes devm work as expected. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/bcma/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 2c1798e38abd..38688236b3cd 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -633,8 +633,11 @@ static int bcma_device_probe(struct device *dev) drv); int err = 0; + get_device(dev); if (adrv->probe) err = adrv->probe(core); + if (err) + put_device(dev); return err; } @@ -647,6 +650,7 @@ static int bcma_device_remove(struct device *dev) if (adrv->remove) adrv->remove(core); + put_device(dev); return 0; } From 9d82393e658cf7010b90a79d7c065e1766e5480c Mon Sep 17 00:00:00 2001 From: Mark Marshall Date: Thu, 26 Jan 2017 16:18:27 +0100 Subject: [PATCH 139/154] mtd: nand: ifc: Fix location of eccstat registers for IFC V1.0 commit 656441478ed55d960df5f3ccdf5a0f8c61dfd0b3 upstream. The commit 7a654172161c ("mtd/ifc: Add support for IFC controller version 2.0") added support for version 2.0 of the IFC controller. The version 2.0 controller has the ECC status registers at a different location to the previous versions. Correct the fsl_ifc_nand structure so that the ECC status can be read from the correct location for both version 1.0 and 2.0 of the controller. Fixes: 7a654172161c ("mtd/ifc: Add support for IFC controller version 2.0") Signed-off-by: Mark Marshall Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/fsl_ifc_nand.c | 8 +++++++- include/linux/fsl_ifc.h | 8 ++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index 0a177b1bfe3e..d1570f512f0b 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -258,9 +258,15 @@ static void fsl_ifc_run_command(struct mtd_info *mtd) int bufnum = nctrl->page & priv->bufnum_mask; int sector = bufnum * chip->ecc.steps; int sector_end = sector + chip->ecc.steps - 1; + __be32 *eccstat_regs; + + if (ctrl->version >= FSL_IFC_VERSION_2_0_0) + eccstat_regs = ifc->ifc_nand.v2_nand_eccstat; + else + eccstat_regs = ifc->ifc_nand.v1_nand_eccstat; for (i = sector / 4; i <= sector_end / 4; i++) - eccstat[i] = ifc_in32(&ifc->ifc_nand.nand_eccstat[i]); + eccstat[i] = ifc_in32(&eccstat_regs[i]); for (i = sector; i <= sector_end; i++) { errors = check_read_ecc(mtd, ctrl, eccstat, i); diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h index 3f9778cbc79d..c332f0a45607 100644 --- a/include/linux/fsl_ifc.h +++ b/include/linux/fsl_ifc.h @@ -733,8 +733,12 @@ struct fsl_ifc_nand { __be32 nand_erattr1; u32 res19[0x10]; __be32 nand_fsr; - u32 res20[0x3]; - __be32 nand_eccstat[6]; + u32 res20; + /* The V1 nand_eccstat is actually 4 words that overlaps the + * V2 nand_eccstat. + */ + __be32 v1_nand_eccstat[2]; + __be32 v2_nand_eccstat[6]; u32 res21[0x1c]; __be32 nanndcr; u32 res22[0x2]; From 788d81d4e5d43ffc2dbcd4513dfec2808f5ff616 Mon Sep 17 00:00:00 2001 From: Magnus Lilja Date: Wed, 21 Dec 2016 22:13:58 +0100 Subject: [PATCH 140/154] dmaengine: ipu: Make sure the interrupt routine checks all interrupts. commit adee40b265d7568296e218f079f478197ffa15bf upstream. Commit 3d8cc00073d6 ("dmaengine: ipu: Consolidate duplicated irq handlers") consolidated the two interrupts routines into one, but the remaining interrupt routine only checks the status of the error interrupts, not the normal interrupts. This patch fixes that problem (tested on i.MX31 PDK board). Fixes: 3d8cc00073d6 ("dmaengine: ipu: Consolidate duplicated irq handlers") Cc: Vinod Koul Signed-off-by: Magnus Lilja Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/ipu/ipu_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c index dd184b50e5b4..284627806b88 100644 --- a/drivers/dma/ipu/ipu_irq.c +++ b/drivers/dma/ipu/ipu_irq.c @@ -272,7 +272,7 @@ static void ipu_irq_handler(struct irq_desc *desc) u32 status; int i, line; - for (i = IPU_IRQ_NR_FN_BANKS; i < IPU_IRQ_NR_BANKS; i++) { + for (i = 0; i < IPU_IRQ_NR_BANKS; i++) { struct ipu_irq_bank *bank = irq_bank + i; raw_spin_lock(&bank_lock); From ec3bc2c5ed576c27e4c89a4fc1654755a0fe71bb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Feb 2017 16:59:46 -0500 Subject: [PATCH 141/154] xprtrdma: Fix Read chunk padding commit 24abdf1be15c478e2821d6fc903a4a4440beff02 upstream. When pad optimization is disabled, rpcrdma_convert_iovs still does not add explicit XDR round-up padding to a Read chunk. Commit 677eb17e94ed ("xprtrdma: Fix XDR tail buffer marshalling") incorrectly short-circuited the test for whether round-up padding is needed that appears later in rpcrdma_convert_iovs. However, if this is indeed a regular Read chunk (and not a Position-Zero Read chunk), the tail iovec _always_ contains the chunk's padding, and never anything else. So, it's easy to just skip the tail when padding optimization is enabled, and add the tail in a subsequent Read chunk segment, if disabled. Fixes: 677eb17e94ed ("xprtrdma: Fix XDR tail buffer marshalling") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/rpc_rdma.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index d987c2d3dd6e..e5a8c22173c2 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -226,8 +226,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, if (len && n == RPCRDMA_MAX_SEGS) goto out_overflow; - /* When encoding the read list, the tail is always sent inline */ - if (type == rpcrdma_readch) + /* When encoding a Read chunk, the tail iovec contains an + * XDR pad and may be omitted. + */ + if (type == rpcrdma_readch && xprt_rdma_pad_optimize) return n; /* When encoding the Write list, some servers need to see an extra @@ -238,10 +240,6 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, return n; if (xdrbuf->tail[0].iov_len) { - /* the rpcrdma protocol allows us to omit any trailing - * xdr pad bytes, saving the server an RDMA operation. */ - if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) - return n; n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n); if (n == RPCRDMA_MAX_SEGS) goto out_overflow; From fab6c2caa48f892c4f3446aea9e253ca8a6187a6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Feb 2017 16:59:54 -0500 Subject: [PATCH 142/154] xprtrdma: Per-connection pad optimization commit b5f0afbea4f2ea52c613ac2b06cb6de2ea18cb6d upstream. Pad optimization is changed by echoing into /proc/sys/sunrpc/rdma_pad_optimize. This is a global setting, affecting all RPC-over-RDMA connections to all servers. The marshaling code picks up that value and uses it for decisions about how to construct each RPC-over-RDMA frame. Having it change suddenly in mid-operation can result in unexpected failures. And some servers a client mounts might need chunk round-up, while others don't. So instead, copy the pad_optimize setting into each connection's rpcrdma_ia when the transport is created, and use the copy, which can't change during the life of the connection, instead. This also removes a hack: rpcrdma_convert_iovs was using the remote-invalidation-expected flag to predict when it could leave out Write chunk padding. This is because the Linux server handles implicit XDR padding on Write chunks correctly, and only Linux servers can set the connection's remote-invalidation-expected flag. It's more sensible to use the pad optimization setting instead. Fixes: 677eb17e94ed ("xprtrdma: Fix XDR tail buffer marshalling") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/rpc_rdma.c | 28 ++++++++++++++-------------- net/sunrpc/xprtrdma/verbs.c | 1 + net/sunrpc/xprtrdma/xprt_rdma.h | 1 + 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index e5a8c22173c2..b8f46c186f02 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -186,9 +186,9 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n) */ static int -rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, - enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, - bool reminv_expected) +rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, + unsigned int pos, enum rpcrdma_chunktype type, + struct rpcrdma_mr_seg *seg) { int len, n, p, page_base; struct page **ppages; @@ -229,14 +229,15 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, /* When encoding a Read chunk, the tail iovec contains an * XDR pad and may be omitted. */ - if (type == rpcrdma_readch && xprt_rdma_pad_optimize) + if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup) return n; - /* When encoding the Write list, some servers need to see an extra - * segment for odd-length Write chunks. The upper layer provides - * space in the tail iovec for this purpose. + /* When encoding a Write chunk, some servers need to see an + * extra segment for non-XDR-aligned Write chunks. The upper + * layer provides space in the tail iovec that may be used + * for this purpose. */ - if (type == rpcrdma_writech && reminv_expected) + if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup) return n; if (xdrbuf->tail[0].iov_len) { @@ -291,7 +292,8 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, if (rtype == rpcrdma_areadch) pos = 0; seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false); + nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos, + rtype, seg); if (nsegs < 0) return ERR_PTR(nsegs); @@ -353,10 +355,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, } seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, + nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, rqst->rq_rcv_buf.head[0].iov_len, - wtype, seg, - r_xprt->rx_ia.ri_reminv_expected); + wtype, seg); if (nsegs < 0) return ERR_PTR(nsegs); @@ -421,8 +422,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, } seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg, - r_xprt->rx_ia.ri_reminv_expected); + nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg); if (nsegs < 0) return ERR_PTR(nsegs); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 8da7f6a4dfc3..39f9e31008bc 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -208,6 +208,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, /* Default settings for RPC-over-RDMA Version One */ r_xprt->rx_ia.ri_reminv_expected = false; + r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize; rsize = RPCRDMA_V1_DEF_INLINE_SIZE; wsize = RPCRDMA_V1_DEF_INLINE_SIZE; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index f6ae1b22da47..07db2d1e072d 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -75,6 +75,7 @@ struct rpcrdma_ia { unsigned int ri_max_inline_write; unsigned int ri_max_inline_read; bool ri_reminv_expected; + bool ri_implicit_roundup; struct ib_qp_attr ri_qp_attr; struct ib_qp_init_attr ri_qp_init_attr; }; From 73eea1c4000fd73c138dbf8826bc6e1fa901ae9b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Feb 2017 17:00:02 -0500 Subject: [PATCH 143/154] xprtrdma: Disable pad optimization by default commit c95a3c6b88658bcb8f77f85f31a0b9d9036e8016 upstream. Commit d5440e27d3e5 ("xprtrdma: Enable pad optimization") made the Linux client omit XDR round-up padding in normal Read and Write chunks so that the client doesn't have to register and invalidate 3-byte memory regions that contain no real data. Unfortunately, my cheery 2014 assessment that this optimization "is supported now by both Linux and Solaris servers" was premature. We've found bugs in Solaris in this area since commit d5440e27d3e5 ("xprtrdma: Enable pad optimization") was merged (SYMLINK is the main offender). So for maximum interoperability, I'm disabling this optimization again. If a CM private message is exchanged when connecting, the client recognizes that the server is Linux, and enables the optimization for that connection. Until now the Solaris server bugs did not impact common operations, and were thus largely benign. Soon, less capable devices on Linux NFS/RDMA clients will make use of Read chunks more often, and these Solaris bugs will prevent interoperation in more cases. Fixes: 677eb17e94ed ("xprtrdma: Fix XDR tail buffer marshalling") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/transport.c | 2 +- net/sunrpc/xprtrdma/verbs.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index ed5e285fd2ea..fa324fe73946 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -67,7 +67,7 @@ unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_inline_write_padding; static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; - int xprt_rdma_pad_optimize = 1; + int xprt_rdma_pad_optimize = 0; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 39f9e31008bc..4d5d5b1c98c4 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -216,6 +216,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, pmsg->cp_magic == rpcrdma_cmp_magic && pmsg->cp_version == RPCRDMA_CMP_VERSION) { r_xprt->rx_ia.ri_reminv_expected = true; + r_xprt->rx_ia.ri_implicit_roundup = true; rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); } From 86840a6305149ce9b466f3a841cffdb3c0fd2608 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Feb 2017 17:00:10 -0500 Subject: [PATCH 144/154] xprtrdma: Reduce required number of send SGEs commit 16f906d66cd76fb9895cbc628f447532a7ac1faa upstream. The MAX_SEND_SGES check introduced in commit 655fec6987be ("xprtrdma: Use gathered Send for large inline messages") fails for devices that have a small max_sge. Instead of checking for a large fixed maximum number of SGEs, check for a minimum small number. RPC-over-RDMA will switch to using a Read chunk if an xdr_buf has more pages than can fit in the device's max_sge limit. This is considerably better than failing all together to mount the server. This fix supports devices that have as few as three send SGEs available. Reported-by: Selvin Xavier Reported-by: Devesh Sharma Reported-by: Honggang Li Reported-by: Ram Amrani Fixes: 655fec6987be ("xprtrdma: Use gathered Send for large ...") Tested-by: Honggang Li Tested-by: Ram Amrani Tested-by: Steve Wise Reviewed-by: Parav Pandit Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/rpc_rdma.c | 26 +++++++++++++++++++++++--- net/sunrpc/xprtrdma/verbs.c | 13 +++++++------ net/sunrpc/xprtrdma/xprt_rdma.h | 2 ++ 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index b8f46c186f02..f57c9f0ab8f9 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -125,14 +125,34 @@ void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt) /* The client can send a request inline as long as the RPCRDMA header * plus the RPC call fit under the transport's inline limit. If the * combined call message size exceeds that limit, the client must use - * the read chunk list for this operation. + * a Read chunk for this operation. + * + * A Read chunk is also required if sending the RPC call inline would + * exceed this device's max_sge limit. */ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) { - struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct xdr_buf *xdr = &rqst->rq_snd_buf; + unsigned int count, remaining, offset; - return rqst->rq_snd_buf.len <= ia->ri_max_inline_write; + if (xdr->len > r_xprt->rx_ia.ri_max_inline_write) + return false; + + if (xdr->page_len) { + remaining = xdr->page_len; + offset = xdr->page_base & ~PAGE_MASK; + count = 0; + while (remaining) { + remaining -= min_t(unsigned int, + PAGE_SIZE - offset, remaining); + offset = 0; + if (++count > r_xprt->rx_ia.ri_max_send_sges) + return false; + } + } + + return true; } /* The client can't know how large the actual reply will be. Thus it diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 4d5d5b1c98c4..e2c37061edbe 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -479,18 +479,19 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) */ int rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, - struct rpcrdma_create_data_internal *cdata) + struct rpcrdma_create_data_internal *cdata) { struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; + unsigned int max_qp_wr, max_sge; struct ib_cq *sendcq, *recvcq; - unsigned int max_qp_wr; int rc; - if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) { - dprintk("RPC: %s: insufficient sge's available\n", - __func__); + max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES); + if (max_sge < RPCRDMA_MIN_SEND_SGES) { + pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); return -ENOMEM; } + ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES; if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { dprintk("RPC: %s: insufficient wqe's available\n", @@ -515,7 +516,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.cap.max_recv_wr = cdata->max_requests; ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */ - ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES; + ep->rep_attr.cap.max_send_sge = max_sge; ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_inline_data = 0; ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 07db2d1e072d..48989d5b2883 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -74,6 +74,7 @@ struct rpcrdma_ia { unsigned int ri_max_frmr_depth; unsigned int ri_max_inline_write; unsigned int ri_max_inline_read; + unsigned int ri_max_send_sges; bool ri_reminv_expected; bool ri_implicit_roundup; struct ib_qp_attr ri_qp_attr; @@ -310,6 +311,7 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ * - xdr_buf tail iovec */ enum { + RPCRDMA_MIN_SEND_SGES = 3, RPCRDMA_MAX_SEND_PAGES = PAGE_SIZE + RPCRDMA_MAX_INLINE - 1, RPCRDMA_MAX_PAGE_SGES = (RPCRDMA_MAX_SEND_PAGES >> PAGE_SHIFT) + 1, RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1, From fccb22e7d5df9b899228eb651747265202e5a321 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 22 Nov 2016 14:55:59 +0530 Subject: [PATCH 145/154] powerpc/xmon: Fix data-breakpoint commit c21a493a2b44650707d06741601894329486f2ad upstream. Currently xmon data-breakpoint feature is broken. Whenever there is a watchpoint match occurs, hw_breakpoint_handler will be called by do_break via notifier chains mechanism. If watchpoint is registered by xmon, hw_breakpoint_handler won't find any associated perf_event and returns immediately with NOTIFY_STOP. Similarly, do_break also returns without notifying to xmon. Solve this by returning NOTIFY_DONE when hw_breakpoint_handler does not find any perf_event associated with matched watchpoint, rather than NOTIFY_STOP, which tells the core code to continue calling the other breakpoint handlers including the xmon one. Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/hw_breakpoint.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 03d089b3ed72..469d86d1c2a5 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -228,8 +228,10 @@ int hw_breakpoint_handler(struct die_args *args) rcu_read_lock(); bp = __this_cpu_read(bp_per_reg); - if (!bp) + if (!bp) { + rc = NOTIFY_DONE; goto out; + } info = counter_arch_bp(bp); /* From 3552f917154a3472d3095988f98289ebdda5a8d3 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 7 Feb 2017 00:09:27 +0530 Subject: [PATCH 146/154] powerpc/mm: Add MMU_FTR_KERNEL_RO to possible feature mask commit a5ecdad4847897007399d7a14c9109b65ce4c9b7 upstream. Without this we will always find the feature disabled. Fixes: 984d7a1ec6 ("powerpc/mm: Fixup kernel read only mapping") Signed-off-by: Aneesh Kumar K.V Acked-by: Balbir Singh Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/mmu.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index a244e09d2d88..5d22b0bef3d8 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -136,6 +136,7 @@ enum { MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE | MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA | + MMU_FTR_KERNEL_RO | #ifdef CONFIG_PPC_RADIX_MMU MMU_FTR_TYPE_RADIX | #endif From 075be78c83989ba4e51ab05121bcb8f8b9c6ef6e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 22 Feb 2017 10:42:02 +0530 Subject: [PATCH 147/154] powerpc/mm/hash: Always clear UPRT and Host Radix bits when setting up CPU commit fda2d27db6eae5c2468f9e4657539b72bbc238bb upstream. We will set LPCR with correct value for radix during int. This make sure we start with a sanitized value of LPCR. In case of kexec, cpus can have LPCR value based on the previous translation mode we were running. Fixes: fe036a0605d60 ("powerpc/64/kexec: Fix MMU cleanup on radix") Acked-by: Michael Neuling Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/cpu_setup_power.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 37c027ca83b2..7803756998e2 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -100,6 +100,8 @@ _GLOBAL(__setup_cpu_power9) mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) or r3, r3, r4 + LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) + andc r3, r3, r4 bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 @@ -120,6 +122,8 @@ _GLOBAL(__restore_cpu_power9) mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) or r3, r3, r4 + LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) + andc r3, r3, r4 bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 From 8a2307c7c0189a63810c23260be1f76346ad949a Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 15 Dec 2016 12:27:21 +0100 Subject: [PATCH 148/154] MIPS: IP22: Reformat inline assembler code to modern standards. commit f9f1c8db1c37253805eaa32265e1e1af3ae7d0a4 upstream. Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/mm/sc-ip22.c | 43 ++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/arch/mips/mm/sc-ip22.c b/arch/mips/mm/sc-ip22.c index 026cb59a914d..610f44b4d764 100644 --- a/arch/mips/mm/sc-ip22.c +++ b/arch/mips/mm/sc-ip22.c @@ -31,26 +31,29 @@ static inline void indy_sc_wipe(unsigned long first, unsigned long last) unsigned long tmp; __asm__ __volatile__( - ".set\tpush\t\t\t# indy_sc_wipe\n\t" - ".set\tnoreorder\n\t" - ".set\tmips3\n\t" - ".set\tnoat\n\t" - "mfc0\t%2, $12\n\t" - "li\t$1, 0x80\t\t\t# Go 64 bit\n\t" - "mtc0\t$1, $12\n\t" - - "dli\t$1, 0x9000000080000000\n\t" - "or\t%0, $1\t\t\t# first line to flush\n\t" - "or\t%1, $1\t\t\t# last line to flush\n\t" - ".set\tat\n\t" - - "1:\tsw\t$0, 0(%0)\n\t" - "bne\t%0, %1, 1b\n\t" - " daddu\t%0, 32\n\t" - - "mtc0\t%2, $12\t\t\t# Back to 32 bit\n\t" - "nop; nop; nop; nop;\n\t" - ".set\tpop" + " .set push # indy_sc_wipe \n" + " .set noreorder \n" + " .set mips3 \n" + " .set noat \n" + " mfc0 %2, $12 \n" + " li $1, 0x80 # Go 64 bit \n" + " mtc0 $1, $12 \n" + " \n" + " dli $1, 0x9000000080000000 \n" + " or %0, $1 # first line to flush \n" + " or %1, $1 # last line to flush \n" + " .set at \n" + " \n" + "1: sw $0, 0(%0) \n" + " bne %0, %1, 1b \n" + " daddu %0, 32 \n" + " \n" + " mtc0 %2, $12 # Back to 32 bit \n" + " nop # pipeline hazard \n" + " nop \n" + " nop \n" + " nop \n" + " .set pop \n" : "=r" (first), "=r" (last), "=&r" (tmp) : "0" (first), "1" (last)); } From aae02d1aafe7c1989a3f10ff1f6ca5eed855426e Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 15 Dec 2016 12:39:22 +0100 Subject: [PATCH 149/154] MIPS: IP22: Fix build error due to binutils 2.25 uselessnes. commit ae2f5e5ed04a17c1aa1f0a3714c725e12c21d2a9 upstream. Fix the following build error with binutils 2.25. CC arch/mips/mm/sc-ip22.o {standard input}: Assembler messages: {standard input}:132: Error: number (0x9000000080000000) larger than 32 bits {standard input}:159: Error: number (0x9000000080000000) larger than 32 bits {standard input}:200: Error: number (0x9000000080000000) larger than 32 bits scripts/Makefile.build:293: recipe for target 'arch/mips/mm/sc-ip22.o' failed make[1]: *** [arch/mips/mm/sc-ip22.o] Error 1 MIPS has used .set mips3 to temporarily switch the assembler to 64 bit mode in 64 bit kernels virtually forever. Binutils 2.25 broke this behavious partially by happily accepting 64 bit instructions in .set mips3 mode but puking on 64 bit constants when generating 32 bit ELF. Binutils 2.26 restored the old behaviour again. Fix build with binutils 2.25 by open coding the offending dli $1, 0x9000000080000000 as li $1, 0x9000 dsll $1, $1, 48 which is ugly be the only thing that will build on all binutils vintages. Signed-off-by: Ralf Baechle Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/mips/mm/sc-ip22.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/mips/mm/sc-ip22.c b/arch/mips/mm/sc-ip22.c index 610f44b4d764..f293a97cb885 100644 --- a/arch/mips/mm/sc-ip22.c +++ b/arch/mips/mm/sc-ip22.c @@ -39,7 +39,18 @@ static inline void indy_sc_wipe(unsigned long first, unsigned long last) " li $1, 0x80 # Go 64 bit \n" " mtc0 $1, $12 \n" " \n" - " dli $1, 0x9000000080000000 \n" + " # \n" + " # Open code a dli $1, 0x9000000080000000 \n" + " # \n" + " # Required because binutils 2.25 will happily accept \n" + " # 64 bit instructions in .set mips3 mode but puke on \n" + " # 64 bit constants when generating 32 bit ELF \n" + " # \n" + " lui $1,0x9000 \n" + " dsll $1,$1,0x10 \n" + " ori $1,$1,0x8000 \n" + " dsll $1,$1,0x10 \n" + " \n" " or %0, $1 # first line to flush \n" " or %1, $1 # last line to flush \n" " .set at \n" From 27ab5414b980b19ba127cb518546d628f40bd3dc Mon Sep 17 00:00:00 2001 From: James Smart Date: Sun, 12 Feb 2017 13:52:25 -0800 Subject: [PATCH 150/154] scsi: lpfc: Correct WQ creation for pagesize commit 8ea73db486cda442f0671f4bc9c03a76be398a28 upstream. Correct WQ creation for pagesize The driver was calculating the adapter command pagesize indicator from the system pagesize. However, the buffers the driver allocates are only one size (SLI4_PAGE_SIZE), so no calculation was necessary. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Cc: Mauricio Faria de Oliveira Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_hw4.h | 2 ++ drivers/scsi/lpfc/lpfc_sli.c | 9 +++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index ee8022737591..55faa94637a9 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -1185,6 +1185,7 @@ struct lpfc_mbx_wq_create { #define lpfc_mbx_wq_create_page_size_SHIFT 0 #define lpfc_mbx_wq_create_page_size_MASK 0x000000FF #define lpfc_mbx_wq_create_page_size_WORD word1 +#define LPFC_WQ_PAGE_SIZE_4096 0x1 #define lpfc_mbx_wq_create_wqe_size_SHIFT 8 #define lpfc_mbx_wq_create_wqe_size_MASK 0x0000000F #define lpfc_mbx_wq_create_wqe_size_WORD word1 @@ -1256,6 +1257,7 @@ struct rq_context { #define lpfc_rq_context_page_size_SHIFT 0 /* Version 1 Only */ #define lpfc_rq_context_page_size_MASK 0x000000FF #define lpfc_rq_context_page_size_WORD word0 +#define LPFC_RQ_PAGE_SIZE_4096 0x1 uint32_t reserved1; uint32_t word2; #define lpfc_rq_context_cq_id_SHIFT 16 diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index f4f77c5b0c83..49b4c798de18 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -13678,7 +13678,7 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, LPFC_WQ_WQE_SIZE_128); bf_set(lpfc_mbx_wq_create_page_size, &wq_create->u.request_1, - (PAGE_SIZE/SLI4_PAGE_SIZE)); + LPFC_WQ_PAGE_SIZE_4096); page = wq_create->u.request_1.page; break; } @@ -13704,8 +13704,9 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, LPFC_WQ_WQE_SIZE_128); break; } - bf_set(lpfc_mbx_wq_create_page_size, &wq_create->u.request_1, - (PAGE_SIZE/SLI4_PAGE_SIZE)); + bf_set(lpfc_mbx_wq_create_page_size, + &wq_create->u.request_1, + LPFC_WQ_PAGE_SIZE_4096); page = wq_create->u.request_1.page; break; default: @@ -13891,7 +13892,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, LPFC_RQE_SIZE_8); bf_set(lpfc_rq_context_page_size, &rq_create->u.request.context, - (PAGE_SIZE/SLI4_PAGE_SIZE)); + LPFC_RQ_PAGE_SIZE_4096); } else { switch (hrq->entry_count) { default: From dc8470f3c831c93ffcda17612e8e58a73210b88b Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 19 Jan 2017 11:21:29 +0800 Subject: [PATCH 151/154] ceph: update readpages osd request according to size of pages commit d641df819db8b80198fd85d9de91137e8a823b07 upstream. add_to_page_cache_lru() can fails, so the actual pages to read can be smaller than the initial size of osd request. We need to update osd request size in that case. Signed-off-by: Yan, Zheng Reviewed-by: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- fs/ceph/addr.c | 1 + net/ceph/osd_client.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index ef3ebd780aff..1e643c718917 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -363,6 +363,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) nr_pages = i; if (nr_pages > 0) { len = nr_pages << PAGE_SHIFT; + osd_req_op_extent_update(req, 0, len); break; } goto out_pages; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index e6ae15bc41b7..0ffeb60cfe67 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -672,7 +672,8 @@ void osd_req_op_extent_update(struct ceph_osd_request *osd_req, BUG_ON(length > previous); op->extent.length = length; - op->indata_len -= previous - length; + if (op->op == CEPH_OSD_OP_WRITE || op->op == CEPH_OSD_OP_WRITEFULL) + op->indata_len -= previous - length; } EXPORT_SYMBOL(osd_req_op_extent_update); From 5f7ff59d067e1888fec91fccefdd84f3987b605e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 16 Jan 2017 18:24:56 +0100 Subject: [PATCH 152/154] netfilter: conntrack: remove GC_MAX_EVICTS break commit 524b698db06b9b6da7192e749f637904e2f62d7b upstream. Instead of breaking loop and instant resched, don't bother checking this in first place (the loop calls cond_resched for every bucket anyway). Suggested-by: Nicolas Dichtel Signed-off-by: Florian Westphal Acked-by: Nicolas Dichtel Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_core.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0f87e5d21be7..f8b142c66e6a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -88,8 +88,6 @@ static __read_mostly bool nf_conntrack_locks_all; #define GC_MAX_BUCKETS_DIV 64u /* upper bound of scan intervals */ #define GC_INTERVAL_MAX (2 * HZ) -/* maximum conntracks to evict per gc run */ -#define GC_MAX_EVICTS 256u static struct conntrack_gc_work conntrack_gc_work; @@ -979,8 +977,7 @@ static void gc_worker(struct work_struct *work) */ rcu_read_unlock(); cond_resched_rcu_qs(); - } while (++buckets < goal && - expired_count < GC_MAX_EVICTS); + } while (++buckets < goal); if (gc_work->exiting) return; @@ -1005,7 +1002,7 @@ static void gc_worker(struct work_struct *work) * In case we have lots of evictions next scan is done immediately. */ ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio >= 90 || expired_count == GC_MAX_EVICTS) { + if (ratio >= 90) { gc_work->next_gc_run = 0; next_run = 0; } else if (expired_count) { From 371d0342a39710e45e6f7c316ca7786b3e34d90f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Jan 2017 02:01:22 +0100 Subject: [PATCH 153/154] netfilter: conntrack: refine gc worker heuristics, redux commit e5072053b09642b8ff417d47da05b84720aea3ee upstream. This further refines the changes made to conntrack gc_worker in commit e0df8cae6c16 ("netfilter: conntrack: refine gc worker heuristics"). The main idea of that change was to reduce the scan interval when evictions take place. However, on the reporters' setup, there are 1-2 million conntrack entries in total and roughly 8k new (and closing) connections per second. In this case we'll always evict at least one entry per gc cycle and scan interval is always at 1 jiffy because of this test: } else if (expired_count) { gc_work->next_gc_run /= 2U; next_run = msecs_to_jiffies(1); being true almost all the time. Given we scan ~10k entries per run its clearly wrong to reduce interval based on nonzero eviction count, it will only waste cpu cycles since a vast majorities of conntracks are not timed out. Thus only look at the ratio (scanned entries vs. evicted entries) to make a decision on whether to reduce or not. Because evictor is supposed to only kick in when system turns idle after a busy period, pick a high ratio -- this makes it 50%. We thus keep the idea of increasing scan rate when its likely that table contains many expired entries. In order to not let timed-out entries hang around for too long (important when using event logging, in which case we want to timely destroy events), we now scan the full table within at most GC_MAX_SCAN_JIFFIES (16 seconds) even in worst-case scenario where all timed-out entries sit in same slot. I tested this with a vm under synflood (with sysctl net.netfilter.nf_conntrack_tcp_timeout_syn_recv=3). While flood is ongoing, interval now stays at its max rate (GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV -> 125ms). With feedback from Nicolas Dichtel. Reported-by: Denys Fedoryshchenko Cc: Nicolas Dichtel Fixes: b87a2f9199ea82eaadc ("netfilter: conntrack: add gc worker to remove timed-out entries") Signed-off-by: Florian Westphal Tested-by: Nicolas Dichtel Acked-by: Nicolas Dichtel Tested-by: Denys Fedoryshchenko Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_core.c | 39 ++++++++++++++++--------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f8b142c66e6a..6bd150882ba4 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -85,9 +85,11 @@ static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; /* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ -#define GC_MAX_BUCKETS_DIV 64u -/* upper bound of scan intervals */ -#define GC_INTERVAL_MAX (2 * HZ) +#define GC_MAX_BUCKETS_DIV 128u +/* upper bound of full table scan */ +#define GC_MAX_SCAN_JIFFIES (16u * HZ) +/* desired ratio of entries found to be expired */ +#define GC_EVICT_RATIO 50u static struct conntrack_gc_work conntrack_gc_work; @@ -936,6 +938,7 @@ static noinline int early_drop(struct net *net, unsigned int _hash) static void gc_worker(struct work_struct *work) { + unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); unsigned int i, goal, buckets = 0, expired_count = 0; struct conntrack_gc_work *gc_work; unsigned int ratio, scanned = 0; @@ -994,27 +997,25 @@ static void gc_worker(struct work_struct *work) * 1. Minimize time until we notice a stale entry * 2. Maximize scan intervals to not waste cycles * - * Normally, expired_count will be 0, this increases the next_run time - * to priorize 2) above. + * Normally, expire ratio will be close to 0. * - * As soon as a timed-out entry is found, move towards 1) and increase - * the scan frequency. - * In case we have lots of evictions next scan is done immediately. + * As soon as a sizeable fraction of the entries have expired + * increase scan frequency. */ ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio >= 90) { - gc_work->next_gc_run = 0; - next_run = 0; - } else if (expired_count) { - gc_work->next_gc_run /= 2U; - next_run = msecs_to_jiffies(1); + if (ratio > GC_EVICT_RATIO) { + gc_work->next_gc_run = min_interval; } else { - if (gc_work->next_gc_run < GC_INTERVAL_MAX) - gc_work->next_gc_run += msecs_to_jiffies(1); + unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV; - next_run = gc_work->next_gc_run; + BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0); + + gc_work->next_gc_run += min_interval; + if (gc_work->next_gc_run > max) + gc_work->next_gc_run = max; } + next_run = gc_work->next_gc_run; gc_work->last_bucket = i; queue_delayed_work(system_long_wq, &gc_work->dwork, next_run); } @@ -1022,7 +1023,7 @@ static void gc_worker(struct work_struct *work) static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); - gc_work->next_gc_run = GC_INTERVAL_MAX; + gc_work->next_gc_run = HZ; gc_work->exiting = false; } @@ -1915,7 +1916,7 @@ int nf_conntrack_init_start(void) nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); conntrack_gc_work_init(&conntrack_gc_work); - queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX); + queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ); return 0; From d379ab2707cfa16e774e2698ff5e32a1f7b243b8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 12 Mar 2017 06:42:15 +0100 Subject: [PATCH 154/154] Linux 4.9.14 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 14dc2758345b..5e7706e94622 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 13 +SUBLEVEL = 14 EXTRAVERSION = NAME = Roaring Lionus