From a8af2054e98548e8515f5d7d5409bd50dcd77e82 Mon Sep 17 00:00:00 2001 From: Purna Chandra Mandal Date: Thu, 2 Jun 2016 14:51:42 +0530 Subject: [PATCH 001/357] MIPS: pic32mzda: Fix linker error for pic32_get_pbclk() commit a726f1d2dd4fee179aa4513176d688ad309de6cc upstream. Early clock API pic32_get_pbclk() is defined in early_clk.c and used by time.c and early_console.c. When CONFIG_EARLY_PRINTK isn't set, early_clk.c isn't compiled and time.c fails to link. Fix it by compiling early_clk.c always. Also sort files in alphabetical order. Fixes: 6e4ad1b41360 ("MIPS: pic32mzda: fix getting timer clock rate.") Reported-by: Harvey Hunt Signed-off-by: Purna Chandra Mandal Reviewed-by: Harvey Hunt Cc: Ralf Baechle Cc: Joshua Henderson Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/13383/ Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/pic32/pic32mzda/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/mips/pic32/pic32mzda/Makefile b/arch/mips/pic32/pic32mzda/Makefile index 4a4c2728c027..c28649615c6c 100644 --- a/arch/mips/pic32/pic32mzda/Makefile +++ b/arch/mips/pic32/pic32mzda/Makefile @@ -2,8 +2,7 @@ # Joshua Henderson, # Copyright (C) 2015 Microchip Technology, Inc. All rights reserved. # -obj-y := init.o time.o config.o +obj-y := config.o early_clk.o init.o time.o obj-$(CONFIG_EARLY_PRINTK) += early_console.o \ - early_pin.o \ - early_clk.o + early_pin.o From ef674c5eb8da4ecb028a2a670b0bb1eec9eed004 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 26 Jan 2017 02:16:47 +0100 Subject: [PATCH 002/357] MIPS: Fix special case in 64 bit IP checksumming. commit 66fd848cadaa6be974a8c780fbeb328f0af4d3bd upstream. For certain arguments such as saddr = 0xc0a8fd60, daddr = 0xc0a8fda1, len = 80, proto = 17, sum = 0x7eae049d there will be a carry when folding the intermediate 64 bit checksum to 32 bit but the code doesn't add the carry back to the one's complement sum, thus an incorrect result will be generated. Reported-by: Mark Zhang Signed-off-by: Ralf Baechle Reviewed-by: James Hogan Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/checksum.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h index bce1ce53149a..0e231970653a 100644 --- a/arch/mips/include/asm/checksum.h +++ b/arch/mips/include/asm/checksum.h @@ -186,7 +186,9 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, " daddu %0, %4 \n" " dsll32 $1, %0, 0 \n" " daddu %0, $1 \n" + " sltu $1, %0, $1 \n" " dsra32 %0, %0, 0 \n" + " addu %0, $1 \n" #endif " .set pop" : "=r" (sum) From ef9e73be031aa4998617a6e11581904687132e5e Mon Sep 17 00:00:00 2001 From: Mirko Parthey Date: Wed, 15 Feb 2017 23:31:30 +0100 Subject: [PATCH 003/357] MIPS: BCM47XX: Fix button inversion for Asus WL-500W MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bdfdaf1a016ef09cb941f2edad485a713510b8d5 upstream. The Asus WL-500W buttons are active high, but the software treats them as active low. Fix the inverted logic. Fixes: 3be972556fa1 ("MIPS: BCM47XX: Import buttons database from OpenWrt") Signed-off-by: Mirko Parthey Acked-by: Rafał Miłecki Cc: Hauke Mehrtens Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/15295/ Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/bcm47xx/buttons.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/mips/bcm47xx/buttons.c b/arch/mips/bcm47xx/buttons.c index 52caa75bfe4e..e2f50d690624 100644 --- a/arch/mips/bcm47xx/buttons.c +++ b/arch/mips/bcm47xx/buttons.c @@ -17,6 +17,12 @@ .active_low = 1, \ } +#define BCM47XX_GPIO_KEY_H(_gpio, _code) \ + { \ + .code = _code, \ + .gpio = _gpio, \ + } + /* Asus */ static const struct gpio_keys_button @@ -79,8 +85,8 @@ bcm47xx_buttons_asus_wl500gpv2[] __initconst = { static const struct gpio_keys_button bcm47xx_buttons_asus_wl500w[] __initconst = { - BCM47XX_GPIO_KEY(6, KEY_RESTART), - BCM47XX_GPIO_KEY(7, KEY_WPS_BUTTON), + BCM47XX_GPIO_KEY_H(6, KEY_RESTART), + BCM47XX_GPIO_KEY_H(7, KEY_WPS_BUTTON), }; static const struct gpio_keys_button From 6f35f1fc14b6188483f993ca989de4b0c85a5b1a Mon Sep 17 00:00:00 2001 From: James Cowgill Date: Mon, 9 Jan 2017 16:52:28 +0000 Subject: [PATCH 004/357] MIPS: OCTEON: Fix copy_from_user fault handling for large buffers commit 884b426917e4b3c85f33b382c792a94305dfdd62 upstream. If copy_from_user is called with a large buffer (>= 128 bytes) and the userspace buffer refers partially to unreadable memory, then it is possible for Octeon's copy_from_user to report the wrong number of bytes have been copied. In the case where the buffer size is an exact multiple of 128 and the fault occurs in the last 64 bytes, copy_from_user will report that all the bytes were copied successfully but leave some garbage in the destination buffer. The bug is in the main __copy_user_common loop in octeon-memcpy.S where in the middle of the loop, src and dst are incremented by 128 bytes. The l_exc_copy fault handler is used after this but that assumes that "src < THREAD_BUADDR($28)". This is not the case if src has already been incremented. Fix by adding an extra fault handler which rewinds the src and dst pointers 128 bytes before falling though to l_exc_copy. Thanks to the pwritev test from the strace test suite for originally highlighting this bug! Fixes: 5b3b16880f40 ("MIPS: Add Cavium OCTEON processor support ...") Signed-off-by: James Cowgill Acked-by: David Daney Reviewed-by: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14978/ Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/cavium-octeon/octeon-memcpy.S | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S index 64e08df51d65..8b7004132491 100644 --- a/arch/mips/cavium-octeon/octeon-memcpy.S +++ b/arch/mips/cavium-octeon/octeon-memcpy.S @@ -208,18 +208,18 @@ EXC( STORE t2, UNIT(6)(dst), s_exc_p10u) ADD src, src, 16*NBYTES EXC( STORE t3, UNIT(7)(dst), s_exc_p9u) ADD dst, dst, 16*NBYTES -EXC( LOAD t0, UNIT(-8)(src), l_exc_copy) -EXC( LOAD t1, UNIT(-7)(src), l_exc_copy) -EXC( LOAD t2, UNIT(-6)(src), l_exc_copy) -EXC( LOAD t3, UNIT(-5)(src), l_exc_copy) +EXC( LOAD t0, UNIT(-8)(src), l_exc_copy_rewind16) +EXC( LOAD t1, UNIT(-7)(src), l_exc_copy_rewind16) +EXC( LOAD t2, UNIT(-6)(src), l_exc_copy_rewind16) +EXC( LOAD t3, UNIT(-5)(src), l_exc_copy_rewind16) EXC( STORE t0, UNIT(-8)(dst), s_exc_p8u) EXC( STORE t1, UNIT(-7)(dst), s_exc_p7u) EXC( STORE t2, UNIT(-6)(dst), s_exc_p6u) EXC( STORE t3, UNIT(-5)(dst), s_exc_p5u) -EXC( LOAD t0, UNIT(-4)(src), l_exc_copy) -EXC( LOAD t1, UNIT(-3)(src), l_exc_copy) -EXC( LOAD t2, UNIT(-2)(src), l_exc_copy) -EXC( LOAD t3, UNIT(-1)(src), l_exc_copy) +EXC( LOAD t0, UNIT(-4)(src), l_exc_copy_rewind16) +EXC( LOAD t1, UNIT(-3)(src), l_exc_copy_rewind16) +EXC( LOAD t2, UNIT(-2)(src), l_exc_copy_rewind16) +EXC( LOAD t3, UNIT(-1)(src), l_exc_copy_rewind16) EXC( STORE t0, UNIT(-4)(dst), s_exc_p4u) EXC( STORE t1, UNIT(-3)(dst), s_exc_p3u) EXC( STORE t2, UNIT(-2)(dst), s_exc_p2u) @@ -383,6 +383,10 @@ done: nop END(memcpy) +l_exc_copy_rewind16: + /* Rewind src and dst by 16*NBYTES for l_exc_copy */ + SUB src, src, 16*NBYTES + SUB dst, dst, 16*NBYTES l_exc_copy: /* * Copy bytes from src until faulting load address (or until a From 3660e62cfce7a1db86fe330dadb92db59d0bc039 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 19 Jan 2017 14:20:09 +0100 Subject: [PATCH 005/357] MIPS: Lantiq: Keep ethernet enabled during boot commit 774f0c6419bb8f9d83901d33582c7fe3ba6a6cb3 upstream. Disabling ethernet during reboot (only to enable it again when the ethernet driver attaches) can put the chip into a faulty state where it corrupts the header of all incoming packets. This happens if packets arrive during the time window where the core is disabled, and it can be easily reproduced by rebooting while sending a flood ping to the broadcast address. Fixes: 95135bfa7ead ("MIPS: Lantiq: Deactivate most of the devices by default") Signed-off-by: Felix Fietkau Acked-by: John Crispin Cc: hauke.mehrtens@lantiq.com Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/15078/ Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/lantiq/xway/sysctrl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 236193b5210b..9a61671c00a7 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -545,7 +545,7 @@ void __init ltq_soc_init(void) clkdev_add_pmu("1a800000.pcie", "msi", 1, 1, PMU1_PCIE2_MSI); clkdev_add_pmu("1a800000.pcie", "pdi", 1, 1, PMU1_PCIE2_PDI); clkdev_add_pmu("1a800000.pcie", "ctl", 1, 1, PMU1_PCIE2_CTL); - clkdev_add_pmu("1e108000.eth", NULL, 1, 0, PMU_SWITCH | PMU_PPE_DP); + clkdev_add_pmu("1e108000.eth", NULL, 0, 0, PMU_SWITCH | PMU_PPE_DP); clkdev_add_pmu("1da00000.usif", "NULL", 1, 0, PMU_USIF); clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); } else if (of_machine_is_compatible("lantiq,ar10")) { @@ -553,7 +553,7 @@ void __init ltq_soc_init(void) ltq_ar10_fpi_hz(), ltq_ar10_pp32_hz()); clkdev_add_pmu("1e101000.usb", "ctl", 1, 0, PMU_USB0); clkdev_add_pmu("1e106000.usb", "ctl", 1, 0, PMU_USB1); - clkdev_add_pmu("1e108000.eth", NULL, 1, 0, PMU_SWITCH | + clkdev_add_pmu("1e108000.eth", NULL, 0, 0, PMU_SWITCH | PMU_PPE_DP | PMU_PPE_TC); clkdev_add_pmu("1da00000.usif", "NULL", 1, 0, PMU_USIF); clkdev_add_pmu("1f203000.rcu", "gphy", 1, 0, PMU_GPHY); @@ -575,11 +575,11 @@ void __init ltq_soc_init(void) clkdev_add_pmu(NULL, "ahb", 1, 0, PMU_AHBM | PMU_AHBS); clkdev_add_pmu("1da00000.usif", "NULL", 1, 0, PMU_USIF); - clkdev_add_pmu("1e108000.eth", NULL, 1, 0, + clkdev_add_pmu("1e108000.eth", NULL, 0, 0, PMU_SWITCH | PMU_PPE_DPLUS | PMU_PPE_DPLUM | PMU_PPE_EMA | PMU_PPE_TC | PMU_PPE_SLL01 | PMU_PPE_QSB | PMU_PPE_TOP); - clkdev_add_pmu("1f203000.rcu", "gphy", 1, 0, PMU_GPHY); + clkdev_add_pmu("1f203000.rcu", "gphy", 0, 0, PMU_GPHY); clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_SDIO); clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); clkdev_add_pmu("1e116000.mei", "dfe", 1, 0, PMU_DFE); From d0eae5bbd1c3361659c3c0e349c14123f0aaba81 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Nov 2016 15:07:02 +0000 Subject: [PATCH 006/357] MIPS: Clear ISA bit correctly in get_frame_info() commit ccaf7caf2c73c6db920772bf08bf1d47b2170634 upstream. get_frame_info() can be called in microMIPS kernels with the ISA bit already clear. For example this happens when unwind_stack_by_address() is called because we begin with a PC that has the ISA bit set & subtract the (odd) offset from the preceding symbol (which does not have the ISA bit set). Since get_frame_info() unconditionally subtracts 1 from the PC in microMIPS kernels it incorrectly misaligns the address it then attempts to access code at, leading to an address error exception. Fix this by using msk_isa16_mode() to clear the ISA bit, which allows get_frame_info() to function regardless of whether it is provided with a PC that has the ISA bit set or not. Signed-off-by: Paul Burton Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14528/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 9514e5f2209f..c0aa99b2e314 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -303,17 +303,14 @@ static inline int is_sp_move_ins(union mips_instruction *ip) static int get_frame_info(struct mips_frame_info *info) { -#ifdef CONFIG_CPU_MICROMIPS - union mips_instruction *ip = (void *) (((char *) info->func) - 1); -#else - union mips_instruction *ip = info->func; -#endif + union mips_instruction *ip; unsigned max_insns = info->func_size / sizeof(union mips_instruction); unsigned i; info->pc_offset = -1; info->frame_size = 0; + ip = (void *)msk_isa16_mode((ulong)info->func); if (!ip) goto err; From ce449cbdcff78a383741bc79d66f2779a556735b Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Nov 2016 15:07:03 +0000 Subject: [PATCH 007/357] MIPS: Prevent unaligned accesses during stack unwinding commit a3552dace7d1d0cabf573e88fc3025cb90c4a601 upstream. During stack unwinding we call a number of functions to determine what type of instruction we're looking at. The union mips_instruction pointer provided to them may be pointing at a 2 byte, but not 4 byte, aligned address & we thus cannot directly access the 4 byte wide members of the union mips_instruction. To avoid this is_ra_save_ins() copies the required half-words of the microMIPS instruction to a correctly aligned union mips_instruction on the stack, which it can then access safely. The is_jump_ins() & is_sp_move_ins() functions do not correctly perform this temporary copy, and instead attempt to directly dereference 4 byte fields which may be misaligned and lead to an address exception. Fix this by copying the instruction halfwords to a temporary union mips_instruction in get_frame_info() such that we can provide a 4 byte aligned union mips_instruction to the is_*_ins() functions and they do not need to deal with misalignment themselves. Signed-off-by: Paul Burton Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14529/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 70 +++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index c0aa99b2e314..941a88191f0e 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -198,8 +198,6 @@ struct mips_frame_info { static inline int is_ra_save_ins(union mips_instruction *ip) { #ifdef CONFIG_CPU_MICROMIPS - union mips_instruction mmi; - /* * swsp ra,offset * swm16 reglist,offset(sp) @@ -209,23 +207,20 @@ static inline int is_ra_save_ins(union mips_instruction *ip) * * microMIPS is way more fun... */ - if (mm_insn_16bit(ip->halfword[0])) { - mmi.word = (ip->halfword[0] << 16); - return (mmi.mm16_r5_format.opcode == mm_swsp16_op && - mmi.mm16_r5_format.rt == 31) || - (mmi.mm16_m_format.opcode == mm_pool16c_op && - mmi.mm16_m_format.func == mm_swm16_op); + if (mm_insn_16bit(ip->halfword[1])) { + return (ip->mm16_r5_format.opcode == mm_swsp16_op && + ip->mm16_r5_format.rt == 31) || + (ip->mm16_m_format.opcode == mm_pool16c_op && + ip->mm16_m_format.func == mm_swm16_op); } else { - mmi.halfword[0] = ip->halfword[1]; - mmi.halfword[1] = ip->halfword[0]; - return (mmi.mm_m_format.opcode == mm_pool32b_op && - mmi.mm_m_format.rd > 9 && - mmi.mm_m_format.base == 29 && - mmi.mm_m_format.func == mm_swm32_func) || - (mmi.i_format.opcode == mm_sw32_op && - mmi.i_format.rs == 29 && - mmi.i_format.rt == 31); + return (ip->mm_m_format.opcode == mm_pool32b_op && + ip->mm_m_format.rd > 9 && + ip->mm_m_format.base == 29 && + ip->mm_m_format.func == mm_swm32_func) || + (ip->i_format.opcode == mm_sw32_op && + ip->i_format.rs == 29 && + ip->i_format.rt == 31); } #else /* sw / sd $ra, offset($sp) */ @@ -246,12 +241,8 @@ static inline int is_jump_ins(union mips_instruction *ip) * * microMIPS is kind of more fun... */ - union mips_instruction mmi; - - mmi.word = (ip->halfword[0] << 16); - - if ((mmi.mm16_r5_format.opcode == mm_pool16c_op && - (mmi.mm16_r5_format.rt & mm_jr16_op) == mm_jr16_op) || + if ((ip->mm16_r5_format.opcode == mm_pool16c_op && + (ip->mm16_r5_format.rt & mm_jr16_op) == mm_jr16_op) || ip->j_format.opcode == mm_jal32_op) return 1; if (ip->r_format.opcode != mm_pool32a_op || @@ -280,15 +271,13 @@ static inline int is_sp_move_ins(union mips_instruction *ip) * * microMIPS is not more fun... */ - if (mm_insn_16bit(ip->halfword[0])) { - union mips_instruction mmi; - - mmi.word = (ip->halfword[0] << 16); - return (mmi.mm16_r3_format.opcode == mm_pool16d_op && - mmi.mm16_r3_format.simmediate && mm_addiusp_func) || - (mmi.mm16_r5_format.opcode == mm_pool16d_op && - mmi.mm16_r5_format.rt == 29); + if (mm_insn_16bit(ip->halfword[1])) { + return (ip->mm16_r3_format.opcode == mm_pool16d_op && + ip->mm16_r3_format.simmediate && mm_addiusp_func) || + (ip->mm16_r5_format.opcode == mm_pool16d_op && + ip->mm16_r5_format.rt == 29); } + return ip->mm_i_format.opcode == mm_addiu32_op && ip->mm_i_format.rt == 29 && ip->mm_i_format.rs == 29; #else @@ -303,7 +292,8 @@ static inline int is_sp_move_ins(union mips_instruction *ip) static int get_frame_info(struct mips_frame_info *info) { - union mips_instruction *ip; + bool is_mmips = IS_ENABLED(CONFIG_CPU_MICROMIPS); + union mips_instruction insn, *ip; unsigned max_insns = info->func_size / sizeof(union mips_instruction); unsigned i; @@ -319,11 +309,21 @@ static int get_frame_info(struct mips_frame_info *info) max_insns = min(128U, max_insns); for (i = 0; i < max_insns; i++, ip++) { + if (is_mmips && mm_insn_16bit(ip->halfword[0])) { + insn.halfword[0] = 0; + insn.halfword[1] = ip->halfword[0]; + } else if (is_mmips) { + insn.halfword[0] = ip->halfword[1]; + insn.halfword[1] = ip->halfword[0]; + } else { + insn.word = ip->word; + } - if (is_jump_ins(ip)) + if (is_jump_ins(&insn)) break; + if (!info->frame_size) { - if (is_sp_move_ins(ip)) + if (is_sp_move_ins(&insn)) { #ifdef CONFIG_CPU_MICROMIPS if (mm_insn_16bit(ip->halfword[0])) @@ -346,7 +346,7 @@ static int get_frame_info(struct mips_frame_info *info) } continue; } - if (info->pc_offset == -1 && is_ra_save_ins(ip)) { + if (info->pc_offset == -1 && is_ra_save_ins(&insn)) { info->pc_offset = ip->i_format.simmediate / sizeof(long); break; From b0b4eb58c5efe31c5dbd8fc771b43bb13bf84430 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Nov 2016 15:07:04 +0000 Subject: [PATCH 008/357] MIPS: Fix get_frame_info() handling of microMIPS function size commit b6c7a324df37bf05ef7a2c1580683cf10d082d97 upstream. get_frame_info() is meant to iterate over up to the first 128 instructions within a function, but for microMIPS kernels it will not reach that many instructions unless the function is 512 bytes long since we calculate the maximum number of instructions to check by dividing the function length by the 4 byte size of a union mips_instruction. In microMIPS kernels this won't do since instructions are variable length. Fix this by instead checking whether the pointer to the current instruction has reached the end of the function, and use max_insns as a simple constant to check the number of iterations against. Signed-off-by: Paul Burton Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14530/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 941a88191f0e..d56ef4ba0a84 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -293,9 +293,9 @@ static inline int is_sp_move_ins(union mips_instruction *ip) static int get_frame_info(struct mips_frame_info *info) { bool is_mmips = IS_ENABLED(CONFIG_CPU_MICROMIPS); - union mips_instruction insn, *ip; - unsigned max_insns = info->func_size / sizeof(union mips_instruction); - unsigned i; + union mips_instruction insn, *ip, *ip_end; + const unsigned int max_insns = 128; + unsigned int i; info->pc_offset = -1; info->frame_size = 0; @@ -304,11 +304,9 @@ static int get_frame_info(struct mips_frame_info *info) if (!ip) goto err; - if (max_insns == 0) - max_insns = 128U; /* unknown function size */ - max_insns = min(128U, max_insns); + ip_end = (void *)ip + info->func_size; - for (i = 0; i < max_insns; i++, ip++) { + for (i = 0; i < max_insns && ip < ip_end; i++, ip++) { if (is_mmips && mm_insn_16bit(ip->halfword[0])) { insn.halfword[0] = 0; insn.halfword[1] = ip->halfword[0]; From b14e085086245a829a87f6cc44c996333c641390 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Nov 2016 15:07:05 +0000 Subject: [PATCH 009/357] MIPS: Fix is_jump_ins() handling of 16b microMIPS instructions commit 67c75057709a6d85c681c78b9b2f9b71191f01a2 upstream. is_jump_ins() checks 16b instruction fields without verifying that the instruction is indeed 16b, as is done by is_ra_save_ins() & is_sp_move_ins(). Add the appropriate check. Signed-off-by: Paul Burton Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14531/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index d56ef4ba0a84..298dc33737b0 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -241,9 +241,14 @@ static inline int is_jump_ins(union mips_instruction *ip) * * microMIPS is kind of more fun... */ - if ((ip->mm16_r5_format.opcode == mm_pool16c_op && - (ip->mm16_r5_format.rt & mm_jr16_op) == mm_jr16_op) || - ip->j_format.opcode == mm_jal32_op) + if (mm_insn_16bit(ip->halfword[1])) { + if ((ip->mm16_r5_format.opcode == mm_pool16c_op && + (ip->mm16_r5_format.rt & mm_jr16_op) == mm_jr16_op)) + return 1; + return 0; + } + + if (ip->j_format.opcode == mm_jal32_op) return 1; if (ip->r_format.opcode != mm_pool32a_op || ip->r_format.func != mm_pool32axf_op) From 209ad1941daafab4255990c3b885cf184a05d72e Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Nov 2016 15:07:06 +0000 Subject: [PATCH 010/357] MIPS: Calculate microMIPS ra properly when unwinding the stack commit bb9bc4689b9c635714fbcd5d335bad9934a7ebfc upstream. get_frame_info() calculates the offset of the return address within a stack frame simply by dividing a the bottom 16 bits of the instruction, treated as a signed integer, by the size of a long. Whilst this works for MIPS32 & MIPS64 ISAs where the sw or sd instructions are used, it's incorrect for microMIPS where encodings differ. The result is that we typically completely fail to unwind the stack on microMIPS. Fix this by adjusting is_ra_save_ins() to calculate the return address offset, and take into account the various different encodings there in the same place as we consider whether an instruction is storing the ra/$31 register. With this we are now able to unwind the stack for kernels targetting the microMIPS ISA, for example we can produce: Call Trace: [<80109e1f>] show_stack+0x63/0x7c [<8011ea17>] __warn+0x9b/0xac [<8011ea45>] warn_slowpath_fmt+0x1d/0x20 [<8013fe53>] register_console+0x43/0x314 [<8067c58d>] of_setup_earlycon+0x1dd/0x1ec [<8067f63f>] early_init_dt_scan_chosen_stdout+0xe7/0xf8 [<8066c115>] do_early_param+0x75/0xac [<801302f9>] parse_args+0x1dd/0x308 [<8066c459>] parse_early_options+0x25/0x28 [<8066c48b>] parse_early_param+0x2f/0x38 [<8066e8cf>] setup_arch+0x113/0x488 [<8066c4f3>] start_kernel+0x57/0x328 ---[ end trace 0000000000000000 ]--- Whereas previously we only produced: Call Trace: [<80109e1f>] show_stack+0x63/0x7c ---[ end trace 0000000000000000 ]--- Signed-off-by: Paul Burton Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14532/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 83 +++++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 20 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 298dc33737b0..39c946fce939 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -195,7 +195,7 @@ struct mips_frame_info { #define J_TARGET(pc,target) \ (((unsigned long)(pc) & 0xf0000000) | ((target) << 2)) -static inline int is_ra_save_ins(union mips_instruction *ip) +static inline int is_ra_save_ins(union mips_instruction *ip, int *poff) { #ifdef CONFIG_CPU_MICROMIPS /* @@ -208,25 +208,70 @@ static inline int is_ra_save_ins(union mips_instruction *ip) * microMIPS is way more fun... */ if (mm_insn_16bit(ip->halfword[1])) { - return (ip->mm16_r5_format.opcode == mm_swsp16_op && - ip->mm16_r5_format.rt == 31) || - (ip->mm16_m_format.opcode == mm_pool16c_op && - ip->mm16_m_format.func == mm_swm16_op); + switch (ip->mm16_r5_format.opcode) { + case mm_swsp16_op: + if (ip->mm16_r5_format.rt != 31) + return 0; + + *poff = ip->mm16_r5_format.simmediate; + *poff = (*poff << 2) / sizeof(ulong); + return 1; + + case mm_pool16c_op: + switch (ip->mm16_m_format.func) { + case mm_swm16_op: + *poff = ip->mm16_m_format.imm; + *poff += 1 + ip->mm16_m_format.rlist; + *poff = (*poff << 2) / sizeof(ulong); + return 1; + + default: + return 0; + } + + default: + return 0; + } } - else { - return (ip->mm_m_format.opcode == mm_pool32b_op && - ip->mm_m_format.rd > 9 && - ip->mm_m_format.base == 29 && - ip->mm_m_format.func == mm_swm32_func) || - (ip->i_format.opcode == mm_sw32_op && - ip->i_format.rs == 29 && - ip->i_format.rt == 31); + + switch (ip->i_format.opcode) { + case mm_sw32_op: + if (ip->i_format.rs != 29) + return 0; + if (ip->i_format.rt != 31) + return 0; + + *poff = ip->i_format.simmediate / sizeof(ulong); + return 1; + + case mm_pool32b_op: + switch (ip->mm_m_format.func) { + case mm_swm32_func: + if (ip->mm_m_format.rd < 0x10) + return 0; + if (ip->mm_m_format.base != 29) + return 0; + + *poff = ip->mm_m_format.simmediate; + *poff += (ip->mm_m_format.rd & 0xf) * sizeof(u32); + *poff /= sizeof(ulong); + return 1; + default: + return 0; + } + + default: + return 0; } #else /* sw / sd $ra, offset($sp) */ - return (ip->i_format.opcode == sw_op || ip->i_format.opcode == sd_op) && - ip->i_format.rs == 29 && - ip->i_format.rt == 31; + if ((ip->i_format.opcode == sw_op || ip->i_format.opcode == sd_op) && + ip->i_format.rs == 29 && ip->i_format.rt == 31) { + *poff = ip->i_format.simmediate / sizeof(ulong); + return 1; + } + + return 0; #endif } @@ -349,11 +394,9 @@ static int get_frame_info(struct mips_frame_info *info) } continue; } - if (info->pc_offset == -1 && is_ra_save_ins(&insn)) { - info->pc_offset = - ip->i_format.simmediate / sizeof(long); + if (info->pc_offset == -1 && + is_ra_save_ins(&insn, &info->pc_offset)) break; - } } if (info->frame_size && info->pc_offset >= 0) /* nested */ return 0; From 8d06cbd365e1fccb5b84bb8146d9bf30c2a55f4a Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Nov 2016 15:07:07 +0000 Subject: [PATCH 011/357] MIPS: Handle microMIPS jumps in the same way as MIPS32/MIPS64 jumps commit 096a0de427ea333f56f0ee00328cff2a2731bcf1 upstream. is_jump_ins() checks for plain jump ("j") instructions since commit e7438c4b893e ("MIPS: Fix sibling call handling in get_frame_info") but that commit didn't make the same change to the microMIPS code, leaving it inconsistent with the MIPS32/MIPS64 code. Handle the microMIPS encoding of the jump instruction too such that it behaves consistently. Signed-off-by: Paul Burton Fixes: e7438c4b893e ("MIPS: Fix sibling call handling in get_frame_info") Cc: Tony Wu Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14533/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 39c946fce939..1652f36acad1 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -293,6 +293,8 @@ static inline int is_jump_ins(union mips_instruction *ip) return 0; } + if (ip->j_format.opcode == mm_j32_op) + return 1; if (ip->j_format.opcode == mm_jal32_op) return 1; if (ip->r_format.opcode != mm_pool32a_op || From 80bbadbc42f912b43fd6664e834ba16c9dd28889 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 18 Jan 2017 17:46:18 +0800 Subject: [PATCH 012/357] mmc: sdhci-acpi: support deferred probe commit e28d6f048799acb0014491e6b74e580d84bd7916 upstream. With commit 67bf5156edc4 ("gpio / ACPI: fix returned error from acpi_dev_gpio_irq_get()"), mmc_gpiod_request_cd() returns -EPROBE_DEFER if GPIO is not ready when sdhci-acpi driver is probed, and sdhci-acpi driver should be probed again later in this case. This fixes an order issue when both GPIO and sdhci-acpi drivers are built as modules. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=177101 Tested-by: Jonas Aaberg Signed-off-by: Zhang Rui Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-acpi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index fddd0be196f4..80918abfc468 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -466,7 +466,10 @@ static int sdhci_acpi_probe(struct platform_device *pdev) if (sdhci_acpi_flag(c, SDHCI_ACPI_SD_CD)) { bool v = sdhci_acpi_flag(c, SDHCI_ACPI_SD_CD_OVERRIDE_LEVEL); - if (mmc_gpiod_request_cd(host->mmc, NULL, 0, v, 0, NULL)) { + err = mmc_gpiod_request_cd(host->mmc, NULL, 0, v, 0, NULL); + if (err) { + if (err == -EPROBE_DEFER) + goto err_free; dev_warn(dev, "failed to setup card detect gpio\n"); c->use_runtime_pm = false; } From d6b88a09cc2291aa3fac1e6eefd479398c1d8793 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Tue, 27 Dec 2016 16:02:36 -0200 Subject: [PATCH 013/357] am437x-vpfe: always assign bpp variable commit 6ebf75774f823ddbdbd10921006989d4df222f4a upstream. In vpfe_s_fmt(), when the sensor format and the requested format were the same, bpp was assigned to vpfe->bpp without being initialized first. Grab the bpp value that is currently used by using __vpfe_get_format() instead of its wrapper, vpfe_try_fmt(). This use of uninitialized variable has been found by compiling the kernel with clang. Fixes: 417d2e507edc ("[media] media: platform: add VPFE capture driver support for AM437X") Signed-off-by: Nicolas Iooss Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/am437x/am437x-vpfe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/am437x/am437x-vpfe.c b/drivers/media/platform/am437x/am437x-vpfe.c index b33b9e35e60e..05489a401c5c 100644 --- a/drivers/media/platform/am437x/am437x-vpfe.c +++ b/drivers/media/platform/am437x/am437x-vpfe.c @@ -1576,7 +1576,7 @@ static int vpfe_s_fmt(struct file *file, void *priv, return -EBUSY; } - ret = vpfe_try_fmt(file, priv, &format); + ret = __vpfe_get_format(vpfe, &format, &bpp); if (ret) return ret; From 02789ccd59ec10fe407cf5803e4fb081a5c14fa7 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Mon, 12 Dec 2016 09:16:51 -0200 Subject: [PATCH 014/357] uvcvideo: Fix a wrong macro commit 17c341ec0115837a610b2da15e32546e26068234 upstream. Don't mix up UVC_BUF_STATE_* and VB2_BUF_STATE_* codes. Fixes: 6998b6fb4b1c ("[media] uvcvideo: Use videobuf2-vmalloc") Signed-off-by: Guennadi Liakhovetski Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/uvc/uvc_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/uvc/uvc_queue.c b/drivers/media/usb/uvc/uvc_queue.c index 77edd206d345..40e5a6b54955 100644 --- a/drivers/media/usb/uvc/uvc_queue.c +++ b/drivers/media/usb/uvc/uvc_queue.c @@ -412,7 +412,7 @@ struct uvc_buffer *uvc_queue_next_buffer(struct uvc_video_queue *queue, nextbuf = NULL; spin_unlock_irqrestore(&queue->irqlock, flags); - buf->state = buf->error ? VB2_BUF_STATE_ERROR : UVC_BUF_STATE_DONE; + buf->state = buf->error ? UVC_BUF_STATE_ERROR : UVC_BUF_STATE_DONE; vb2_set_plane_payload(&buf->buf.vb2_buf, 0, buf->bytesused); vb2_buffer_done(&buf->buf.vb2_buf, VB2_BUF_STATE_DONE); From 7e5b7798d0c8021d5d9b7a9e7bd404c6bcbbd011 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 7 Jan 2017 23:08:49 -0200 Subject: [PATCH 015/357] media: fix dm1105.c build error commit e3bb3cddd177550d63a3e4909cf1a7782f13414d upstream. Fix dm1105 build error when CONFIG_I2C_ALGOBIT=m and CONFIG_DVB_DM1105=y. drivers/built-in.o: In function `dm1105_probe': dm1105.c:(.text+0x2836e7): undefined reference to `i2c_bit_add_bus' Signed-off-by: Randy Dunlap Reported-by: kbuild test robot Cc: Javier Martinez Canillas Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/dm1105/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/pci/dm1105/Kconfig b/drivers/media/pci/dm1105/Kconfig index 173daf0c0847..14fa7e40f2a6 100644 --- a/drivers/media/pci/dm1105/Kconfig +++ b/drivers/media/pci/dm1105/Kconfig @@ -1,6 +1,6 @@ config DVB_DM1105 tristate "SDMC DM1105 based PCI cards" - depends on DVB_CORE && PCI && I2C + depends on DVB_CORE && PCI && I2C && I2C_ALGOBIT select DVB_PLL if MEDIA_SUBDRV_AUTOSELECT select DVB_STV0299 if MEDIA_SUBDRV_AUTOSELECT select DVB_STV0288 if MEDIA_SUBDRV_AUTOSELECT From 238442c2b535ef39be70f54904a8882dfd0349ca Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Mon, 16 Jan 2017 19:27:41 -0200 Subject: [PATCH 016/357] cxd2820r: fix gpio null pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0ffb94b6cc5df6376ab6bff5b80075641f6716f8 upstream. Setting GPIOs during probe causes null pointer deference when GPIOLIB was not selected by Kconfig. Initialize driver private field before calling set gpios. It is regressing bug since 4.9. Fixes: 07fdf7d9f19f ("[media] cxd2820r: add I2C driver bindings") Reported-by: Chris Rankin Tested-by: Chris Rankin Tested-by: Håkan Lennestål Signed-off-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/cxd2820r_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/cxd2820r_core.c b/drivers/media/dvb-frontends/cxd2820r_core.c index 95267c6edb3a..f6ebbb47b9b2 100644 --- a/drivers/media/dvb-frontends/cxd2820r_core.c +++ b/drivers/media/dvb-frontends/cxd2820r_core.c @@ -615,6 +615,7 @@ static int cxd2820r_probe(struct i2c_client *client, } priv->client[0] = client; + priv->fe.demodulator_priv = priv; priv->i2c = client->adapter; priv->ts_mode = pdata->ts_mode; priv->ts_clk_inv = pdata->ts_clk_inv; @@ -697,7 +698,6 @@ static int cxd2820r_probe(struct i2c_client *client, memcpy(&priv->fe.ops, &cxd2820r_ops, sizeof(priv->fe.ops)); if (!pdata->attach_in_use) priv->fe.ops.release = NULL; - priv->fe.demodulator_priv = priv; i2c_set_clientdata(client, priv); /* Setup callbacks */ From ce1e60b492144b1d5ae0f67b781459325de0e884 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Fri, 2 Dec 2016 15:16:08 -0200 Subject: [PATCH 017/357] lirc_dev: LIRC_{G,S}ET_REC_MODE do not work commit bd291208d7f5d6b2d6a033fee449a429230b06df upstream. Since "273b902 [media] lirc_dev: use LIRC_CAN_REC() define" these ioctls no longer work. Signed-off-by: Sean Young Reviewed-by: Andi Shyti Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/lirc_dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index 91f9bb87ce68..6ebe89551961 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -589,7 +589,7 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) result = put_user(ir->d.features, (__u32 __user *)arg); break; case LIRC_GET_REC_MODE: - if (LIRC_CAN_REC(ir->d.features)) { + if (!LIRC_CAN_REC(ir->d.features)) { result = -ENOTTY; break; } @@ -599,7 +599,7 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) (__u32 __user *)arg); break; case LIRC_SET_REC_MODE: - if (LIRC_CAN_REC(ir->d.features)) { + if (!LIRC_CAN_REC(ir->d.features)) { result = -ENOTTY; break; } From 719f1765b02c6a65eff5db3765c87444a1156cb9 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 2 Jan 2017 08:32:47 -0200 Subject: [PATCH 018/357] media: Properly pass through media entity types in entity enumeration commit 98d85f3cb912fde14593ead54dea4c1a00b3966f upstream. When the functions replaced media entity types, the range which was allowed for the types was incorrect. This meant that media entity types for specific devices were not passed correctly to the userspace through MEDIA_IOC_ENUM_ENTITIES. Fix it. Fixes: commit b2cd27448b33 ("[media] media-device: map new functions into old types for legacy API") Reported-and-tested-by: Antti Laakso Signed-off-by: Sakari Ailus Acked-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/media-device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/media-device.c b/drivers/media/media-device.c index 2783531f9fc0..4462d8c69d57 100644 --- a/drivers/media/media-device.c +++ b/drivers/media/media-device.c @@ -130,7 +130,7 @@ static long media_device_enum_entities(struct media_device *mdev, * old range. */ if (ent->function < MEDIA_ENT_F_OLD_BASE || - ent->function > MEDIA_ENT_T_DEVNODE_UNKNOWN) { + ent->function > MEDIA_ENT_F_TUNER) { if (is_media_entity_v4l2_subdev(ent)) entd->type = MEDIA_ENT_F_V4L2_SUBDEV_UNKNOWN; else if (ent->function != MEDIA_ENT_F_IO_V4L) From da1e40237f8f3516581b534c484c236a79ccfd14 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 11 Jan 2017 21:50:46 -0500 Subject: [PATCH 019/357] ext4: fix deadlock between inline_data and ext4_expand_extra_isize_ea() commit c755e251357a0cee0679081f08c3f4ba797a8009 upstream. The xattr_sem deadlock problems fixed in commit 2e81a4eeedca: "ext4: avoid deadlock when expanding inode size" didn't include the use of xattr_sem in fs/ext4/inline.c. With the addition of project quota which added a new extra inode field, this exposed deadlocks in the inline_data code similar to the ones fixed by 2e81a4eeedca. The deadlock can be reproduced via: dmesg -n 7 mke2fs -t ext4 -O inline_data -Fq -I 256 /dev/vdc 32768 mount -t ext4 -o debug_want_extra_isize=24 /dev/vdc /vdc mkdir /vdc/a umount /vdc mount -t ext4 /dev/vdc /vdc echo foo > /vdc/a/foo and looks like this: [ 11.158815] [ 11.160276] ============================================= [ 11.161960] [ INFO: possible recursive locking detected ] [ 11.161960] 4.10.0-rc3-00015-g011b30a8a3cf #160 Tainted: G W [ 11.161960] --------------------------------------------- [ 11.161960] bash/2519 is trying to acquire lock: [ 11.161960] (&ei->xattr_sem){++++..}, at: [] ext4_expand_extra_isize_ea+0x3d/0x4cd [ 11.161960] [ 11.161960] but task is already holding lock: [ 11.161960] (&ei->xattr_sem){++++..}, at: [] ext4_try_add_inline_entry+0x3a/0x152 [ 11.161960] [ 11.161960] other info that might help us debug this: [ 11.161960] Possible unsafe locking scenario: [ 11.161960] [ 11.161960] CPU0 [ 11.161960] ---- [ 11.161960] lock(&ei->xattr_sem); [ 11.161960] lock(&ei->xattr_sem); [ 11.161960] [ 11.161960] *** DEADLOCK *** [ 11.161960] [ 11.161960] May be due to missing lock nesting notation [ 11.161960] [ 11.161960] 4 locks held by bash/2519: [ 11.161960] #0: (sb_writers#3){.+.+.+}, at: [] mnt_want_write+0x1e/0x3e [ 11.161960] #1: (&type->i_mutex_dir_key){++++++}, at: [] path_openat+0x338/0x67a [ 11.161960] #2: (jbd2_handle){++++..}, at: [] start_this_handle+0x582/0x622 [ 11.161960] #3: (&ei->xattr_sem){++++..}, at: [] ext4_try_add_inline_entry+0x3a/0x152 [ 11.161960] [ 11.161960] stack backtrace: [ 11.161960] CPU: 0 PID: 2519 Comm: bash Tainted: G W 4.10.0-rc3-00015-g011b30a8a3cf #160 [ 11.161960] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1 04/01/2014 [ 11.161960] Call Trace: [ 11.161960] dump_stack+0x72/0xa3 [ 11.161960] __lock_acquire+0xb7c/0xcb9 [ 11.161960] ? kvm_clock_read+0x1f/0x29 [ 11.161960] ? __lock_is_held+0x36/0x66 [ 11.161960] ? __lock_is_held+0x36/0x66 [ 11.161960] lock_acquire+0x106/0x18a [ 11.161960] ? ext4_expand_extra_isize_ea+0x3d/0x4cd [ 11.161960] down_write+0x39/0x72 [ 11.161960] ? ext4_expand_extra_isize_ea+0x3d/0x4cd [ 11.161960] ext4_expand_extra_isize_ea+0x3d/0x4cd [ 11.161960] ? _raw_read_unlock+0x22/0x2c [ 11.161960] ? jbd2_journal_extend+0x1e2/0x262 [ 11.161960] ? __ext4_journal_get_write_access+0x3d/0x60 [ 11.161960] ext4_mark_inode_dirty+0x17d/0x26d [ 11.161960] ? ext4_add_dirent_to_inline.isra.12+0xa5/0xb2 [ 11.161960] ext4_add_dirent_to_inline.isra.12+0xa5/0xb2 [ 11.161960] ext4_try_add_inline_entry+0x69/0x152 [ 11.161960] ext4_add_entry+0xa3/0x848 [ 11.161960] ? __brelse+0x14/0x2f [ 11.161960] ? _raw_spin_unlock_irqrestore+0x44/0x4f [ 11.161960] ext4_add_nondir+0x17/0x5b [ 11.161960] ext4_create+0xcf/0x133 [ 11.161960] ? ext4_mknod+0x12f/0x12f [ 11.161960] lookup_open+0x39e/0x3fb [ 11.161960] ? __wake_up+0x1a/0x40 [ 11.161960] ? lock_acquire+0x11e/0x18a [ 11.161960] path_openat+0x35c/0x67a [ 11.161960] ? sched_clock_cpu+0xd7/0xf2 [ 11.161960] do_filp_open+0x36/0x7c [ 11.161960] ? _raw_spin_unlock+0x22/0x2c [ 11.161960] ? __alloc_fd+0x169/0x173 [ 11.161960] do_sys_open+0x59/0xcc [ 11.161960] SyS_open+0x1d/0x1f [ 11.161960] do_int80_syscall_32+0x4f/0x61 [ 11.161960] entry_INT80_32+0x2f/0x2f [ 11.161960] EIP: 0xb76ad469 [ 11.161960] EFLAGS: 00000286 CPU: 0 [ 11.161960] EAX: ffffffda EBX: 08168ac8 ECX: 00008241 EDX: 000001b6 [ 11.161960] ESI: b75e46bc EDI: b7755000 EBP: bfbdb108 ESP: bfbdafc0 [ 11.161960] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b Reported-by: George Spelvin Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 66 ++++++++++++++++++++++-------------------------- fs/ext4/xattr.c | 30 +++++++++------------- fs/ext4/xattr.h | 32 +++++++++++++++++++++++ 3 files changed, 74 insertions(+), 54 deletions(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index d8ca4b9f9dd6..028329721bbe 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -376,7 +376,7 @@ out: static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, unsigned int len) { - int ret, size; + int ret, size, no_expand; struct ext4_inode_info *ei = EXT4_I(inode); if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) @@ -386,15 +386,14 @@ static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, if (size < len) return -ENOSPC; - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); if (ei->i_inline_off) ret = ext4_update_inline_data(handle, inode, len); else ret = ext4_create_inline_data(handle, inode, len); - up_write(&EXT4_I(inode)->xattr_sem); - + ext4_write_unlock_xattr(inode, &no_expand); return ret; } @@ -523,7 +522,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping, struct inode *inode, unsigned flags) { - int ret, needed_blocks; + int ret, needed_blocks, no_expand; handle_t *handle = NULL; int retries = 0, sem_held = 0; struct page *page = NULL; @@ -563,7 +562,7 @@ retry: goto out; } - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); sem_held = 1; /* If some one has already done this for us, just exit. */ if (!ext4_has_inline_data(inode)) { @@ -600,7 +599,7 @@ retry: put_page(page); page = NULL; ext4_orphan_add(handle, inode); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); sem_held = 0; ext4_journal_stop(handle); handle = NULL; @@ -626,7 +625,7 @@ out: put_page(page); } if (sem_held) - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); if (handle) ext4_journal_stop(handle); brelse(iloc.bh); @@ -719,7 +718,7 @@ convert: int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, struct page *page) { - int ret; + int ret, no_expand; void *kaddr; struct ext4_iloc iloc; @@ -737,7 +736,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, goto out; } - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); BUG_ON(!ext4_has_inline_data(inode)); kaddr = kmap_atomic(page); @@ -747,7 +746,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, /* clear page dirty so that writepages wouldn't work for us. */ ClearPageDirty(page); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); brelse(iloc.bh); out: return copied; @@ -758,7 +757,7 @@ ext4_journalled_write_inline_data(struct inode *inode, unsigned len, struct page *page) { - int ret; + int ret, no_expand; void *kaddr; struct ext4_iloc iloc; @@ -768,11 +767,11 @@ ext4_journalled_write_inline_data(struct inode *inode, return NULL; } - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); kaddr = kmap_atomic(page); ext4_write_inline_data(inode, &iloc, kaddr, 0, len); kunmap_atomic(kaddr); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return iloc.bh; } @@ -1249,7 +1248,7 @@ out: int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode) { - int ret, inline_size; + int ret, inline_size, no_expand; void *inline_start; struct ext4_iloc iloc; @@ -1257,7 +1256,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, if (ret) return ret; - down_write(&EXT4_I(dir)->xattr_sem); + ext4_write_lock_xattr(dir, &no_expand); if (!ext4_has_inline_data(dir)) goto out; @@ -1303,7 +1302,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, out: ext4_mark_inode_dirty(handle, dir); - up_write(&EXT4_I(dir)->xattr_sem); + ext4_write_unlock_xattr(dir, &no_expand); brelse(iloc.bh); return ret; } @@ -1663,7 +1662,7 @@ int ext4_delete_inline_entry(handle_t *handle, struct buffer_head *bh, int *has_inline_data) { - int err, inline_size; + int err, inline_size, no_expand; struct ext4_iloc iloc; void *inline_start; @@ -1671,7 +1670,7 @@ int ext4_delete_inline_entry(handle_t *handle, if (err) return err; - down_write(&EXT4_I(dir)->xattr_sem); + ext4_write_lock_xattr(dir, &no_expand); if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; goto out; @@ -1705,7 +1704,7 @@ int ext4_delete_inline_entry(handle_t *handle, ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size); out: - up_write(&EXT4_I(dir)->xattr_sem); + ext4_write_unlock_xattr(dir, &no_expand); brelse(iloc.bh); if (err != -ENOENT) ext4_std_error(dir->i_sb, err); @@ -1804,11 +1803,11 @@ out: int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) { - int ret; + int ret, no_expand; - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); ret = ext4_destroy_inline_data_nolock(handle, inode); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return ret; } @@ -1893,7 +1892,7 @@ out: void ext4_inline_data_truncate(struct inode *inode, int *has_inline) { handle_t *handle; - int inline_size, value_len, needed_blocks; + int inline_size, value_len, needed_blocks, no_expand; size_t i_size; void *value = NULL; struct ext4_xattr_ibody_find is = { @@ -1910,7 +1909,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline) if (IS_ERR(handle)) return; - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); if (!ext4_has_inline_data(inode)) { *has_inline = 0; ext4_journal_stop(handle); @@ -1968,7 +1967,7 @@ out_error: up_write(&EXT4_I(inode)->i_data_sem); out: brelse(is.iloc.bh); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); kfree(value); if (inode->i_nlink) ext4_orphan_del(handle, inode); @@ -1984,7 +1983,7 @@ out: int ext4_convert_inline_data(struct inode *inode) { - int error, needed_blocks; + int error, needed_blocks, no_expand; handle_t *handle; struct ext4_iloc iloc; @@ -2006,15 +2005,10 @@ int ext4_convert_inline_data(struct inode *inode) goto out_free; } - down_write(&EXT4_I(inode)->xattr_sem); - if (!ext4_has_inline_data(inode)) { - up_write(&EXT4_I(inode)->xattr_sem); - goto out; - } - - error = ext4_convert_inline_data_nolock(handle, inode, &iloc); - up_write(&EXT4_I(inode)->xattr_sem); -out: + ext4_write_lock_xattr(inode, &no_expand); + if (ext4_has_inline_data(inode)) + error = ext4_convert_inline_data_nolock(handle, inode, &iloc); + ext4_write_unlock_xattr(inode, &no_expand); ext4_journal_stop(handle); out_free: brelse(iloc.bh); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index d77be9e9f535..4448ed37181b 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1174,16 +1174,14 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, struct ext4_xattr_block_find bs = { .s = { .not_found = -ENODATA, }, }; - unsigned long no_expand; + int no_expand; int error; if (!name) return -EINVAL; if (strlen(name) > 255) return -ERANGE; - down_write(&EXT4_I(inode)->xattr_sem); - no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); - ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); + ext4_write_lock_xattr(inode, &no_expand); error = ext4_reserve_inode_write(handle, inode, &is.iloc); if (error) @@ -1251,7 +1249,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, ext4_xattr_update_super_block(handle, inode->i_sb); inode->i_ctime = ext4_current_time(inode); if (!value) - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); + no_expand = 0; error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); /* * The bh is consumed by ext4_mark_iloc_dirty, even with @@ -1265,9 +1263,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, cleanup: brelse(is.iloc.bh); brelse(bs.bh); - if (no_expand == 0) - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return error; } @@ -1484,12 +1480,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, int error = 0, tried_min_extra_isize = 0; int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); int isize_diff; /* How much do we need to grow i_extra_isize */ + int no_expand; + + if (ext4_write_trylock_xattr(inode, &no_expand) == 0) + return 0; - down_write(&EXT4_I(inode)->xattr_sem); - /* - * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty - */ - ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); retry: isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize; if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) @@ -1571,17 +1566,16 @@ shift: EXT4_I(inode)->i_extra_isize = new_extra_isize; brelse(bh); out: - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return 0; cleanup: brelse(bh); /* - * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode - * size expansion failed. + * Inode size expansion failed; don't try again */ - up_write(&EXT4_I(inode)->xattr_sem); + no_expand = 1; + ext4_write_unlock_xattr(inode, &no_expand); return error; } diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index a92e783fa057..099c8b670ef5 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -102,6 +102,38 @@ extern const struct xattr_handler ext4_xattr_security_handler; #define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c" +/* + * The EXT4_STATE_NO_EXPAND is overloaded and used for two purposes. + * The first is to signal that there the inline xattrs and data are + * taking up so much space that we might as well not keep trying to + * expand it. The second is that xattr_sem is taken for writing, so + * we shouldn't try to recurse into the inode expansion. For this + * second case, we need to make sure that we take save and restore the + * NO_EXPAND state flag appropriately. + */ +static inline void ext4_write_lock_xattr(struct inode *inode, int *save) +{ + down_write(&EXT4_I(inode)->xattr_sem); + *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); +} + +static inline int ext4_write_trylock_xattr(struct inode *inode, int *save) +{ + if (down_write_trylock(&EXT4_I(inode)->xattr_sem) == 0) + return 0; + *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); + return 1; +} + +static inline void ext4_write_unlock_xattr(struct inode *inode, int *save) +{ + if (*save == 0) + ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); + up_write(&EXT4_I(inode)->xattr_sem); +} + extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); From 404950add4ce0bfb95346a8233f5daba7fad8a54 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 10 Feb 2017 11:20:19 +0900 Subject: [PATCH 020/357] spi: s3c64xx: fix inconsistency between binding and driver commit 379f831a927817c130a62e3ca0082ae685557324 upstream. Commit a92e7c3d82a1 ("spi: s3c64xx: consider the case when the CS line is not connected") introduced an inconsistency between the binding, where the disconnected CS line was marked as 'no-cs-readback', and the driver. The driver is erroneously checking for that attribute with property name of 'broken-cs'. Check for 'no-cs-readback' in the driver as well. Fixes: a92e7c3d82a1 ("spi: s3c64xx: consider the case when the CS line is not connected") Signed-off-by: Andi Shyti Reviewed-by: Krzysztof Kozlowski Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-s3c64xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index 3c09e94cf827..186342b74141 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -1003,7 +1003,7 @@ static struct s3c64xx_spi_info *s3c64xx_spi_parse_dt(struct device *dev) sci->num_cs = temp; } - sci->no_cs = of_property_read_bool(dev->of_node, "broken-cs"); + sci->no_cs = of_property_read_bool(dev->of_node, "no-cs-readback"); return sci; } From 37e70c4de51bf80123292f175dfce6b19af9c5d8 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 25 Oct 2016 11:37:58 +0200 Subject: [PATCH 021/357] ARM: at91: define LPDDR types commit e3f0a4017c2143b4b813df6a93e8cf79e3f76936 upstream. The Atmel MPDDR controller support LPDDR2 and LPDDR3 memories, add their types. Signed-off-by: Alexandre Belloni Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- include/soc/at91/at91sam9_ddrsdr.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/soc/at91/at91sam9_ddrsdr.h b/include/soc/at91/at91sam9_ddrsdr.h index dc10c52e0e91..393362bdb860 100644 --- a/include/soc/at91/at91sam9_ddrsdr.h +++ b/include/soc/at91/at91sam9_ddrsdr.h @@ -81,6 +81,7 @@ #define AT91_DDRSDRC_LPCB_POWER_DOWN 2 #define AT91_DDRSDRC_LPCB_DEEP_POWER_DOWN 3 #define AT91_DDRSDRC_CLKFR (1 << 2) /* Clock Frozen */ +#define AT91_DDRSDRC_LPDDR2_PWOFF (1 << 3) /* LPDDR Power Off */ #define AT91_DDRSDRC_PASR (7 << 4) /* Partial Array Self Refresh */ #define AT91_DDRSDRC_TCSR (3 << 8) /* Temperature Compensated Self Refresh */ #define AT91_DDRSDRC_DS (3 << 10) /* Drive Strength */ @@ -96,7 +97,9 @@ #define AT91_DDRSDRC_MD_SDR 0 #define AT91_DDRSDRC_MD_LOW_POWER_SDR 1 #define AT91_DDRSDRC_MD_LOW_POWER_DDR 3 +#define AT91_DDRSDRC_MD_LPDDR3 5 #define AT91_DDRSDRC_MD_DDR2 6 /* [SAM9 Only] */ +#define AT91_DDRSDRC_MD_LPDDR2 7 #define AT91_DDRSDRC_DBW (1 << 4) /* Data Bus Width */ #define AT91_DDRSDRC_DBW_32BITS (0 << 4) #define AT91_DDRSDRC_DBW_16BITS (1 << 4) From f9a1949f8ff308d4320a44aa995c7f16ab40cf7f Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 19 Jan 2017 01:46:58 +0100 Subject: [PATCH 022/357] ARM: dts: at91: Enable DMA on sama5d4_xplained console commit ef8d02d4a2c36f7a93e74c95a9c419353b310117 upstream. Enable DMA on usart3 to get a more reliable console. This is especially useful for automation and kernelci were a kernel with PROVE_LOCKING enabled is quite susceptible to character loss, resulting in tests failure. Acked-by: Nicolas Ferre Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/at91-sama5d4_xplained.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts index ed7fce297738..44d1171c7fc0 100644 --- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts @@ -110,6 +110,8 @@ }; usart3: serial@fc00c000 { + atmel,use-dma-rx; + atmel,use-dma-tx; status = "okay"; }; From 9971863fdd623da5df091159cc889ea101b68202 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 19 Jan 2017 23:05:39 +0100 Subject: [PATCH 023/357] ARM: dts: at91: Enable DMA on sama5d2_xplained console commit 78162d48466d23c45a784034630c5928af631e3d upstream. Enable DMA on uart1 to get a more reliable console. Acked-by: Nicolas Ferre Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/at91-sama5d2_xplained.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/at91-sama5d2_xplained.dts b/arch/arm/boot/dts/at91-sama5d2_xplained.dts index 0b9a59d5fdac..30fac04289a5 100644 --- a/arch/arm/boot/dts/at91-sama5d2_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d2_xplained.dts @@ -148,6 +148,8 @@ uart1: serial@f8020000 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_uart1_default>; + atmel,use-dma-rx; + atmel,use-dma-tx; status = "okay"; }; From ebc3e95502aa9feebe1fd021b6d3c44dd27ecb48 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 16 Feb 2017 15:26:54 +0800 Subject: [PATCH 024/357] ALSA: hda/realtek - Cannot adjust speaker's volume on a Dell AIO commit 9f1bc2c4c58fcb2d86e0e26437dc8f3a18ac3276 upstream. The issue is the same as "dd9aa335c880 ALSA: hda/realtek - Can't adjust speaker's volume on a Dell AIO", the output requires to connect to a node with Amp-out capability. Applying the same fixup "ALC298_FIXUP_SPK_VOLUME" can fix the issue. Signed-off-by: Kai-Heng Feng Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 758ac86a1d3a..d4402b02b1bc 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6065,6 +6065,12 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, ALC298_STANDARD_PINS, {0x17, 0x90170150}), + SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_SPK_VOLUME, + {0x12, 0xb7a60140}, + {0x13, 0xb7a60150}, + {0x17, 0x90170110}, + {0x1a, 0x03011020}, + {0x21, 0x03211030}), {} }; From 51ce9867c2efd422519edb92e6d4ea5276b3f283 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Wed, 15 Feb 2017 17:09:42 +0100 Subject: [PATCH 025/357] ALSA: hda - fix Lewisburg audio issue commit e7480b34ad1ab84a63540b2c884cb92c0764ab74 upstream. Like for Sunrise Point, the total stream number of Lewisburg's input and output stream exceeds 15 (GCAP is 0x9701), which will cause some streams do not work because of the overflow on SDxCTL.STRM field if using the legacy stream tag allocation method. Fixes: 5cf92c8b3dc5 ("ALSA: hda - Add Intel Lewisburg device IDs Audio") Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index c64d986009a9..bc4462694aaf 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2197,9 +2197,9 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, /* Lewisburg */ { PCI_DEVICE(0x8086, 0xa1f0), - .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, { PCI_DEVICE(0x8086, 0xa270), - .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, /* Lynx Point-LP */ { PCI_DEVICE(0x8086, 0x9c20), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, From 5ac9276dd15f6aeb55e8eb0ecf6e15759ae1f501 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 28 Feb 2017 14:49:07 +0100 Subject: [PATCH 026/357] ALSA: timer: Reject user params with too small ticks commit 71321eb3f2d0df4e6c327e0b936eec4458a12054 upstream. When a user sets a too small ticks with a fine-grained timer like hrtimer, the kernel tries to fire up the timer irq too frequently. This may lead to the condensed locks, eventually the kernel spinlock lockup with warnings. For avoiding such a situation, we define a lower limit of the resolution, namely 1ms. When the user passes a too small tick value that results in less than that, the kernel returns -EINVAL now. Reported-by: Dmitry Vyukov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/timer.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index fc144f43faa6..ad153149b231 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1702,9 +1702,21 @@ static int snd_timer_user_params(struct file *file, return -EBADFD; if (copy_from_user(¶ms, _params, sizeof(params))) return -EFAULT; - if (!(t->hw.flags & SNDRV_TIMER_HW_SLAVE) && params.ticks < 1) { - err = -EINVAL; - goto _end; + if (!(t->hw.flags & SNDRV_TIMER_HW_SLAVE)) { + u64 resolution; + + if (params.ticks < 1) { + err = -EINVAL; + goto _end; + } + + /* Don't allow resolution less than 1ms */ + resolution = snd_timer_resolution(tu->timeri); + resolution *= params.ticks; + if (resolution < 1000000) { + err = -EINVAL; + goto _end; + } } if (params.queue_size > 0 && (params.queue_size < 32 || params.queue_size > 1024)) { From 074f6db61f962e302c83092ac2ee5692f9351454 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 28 Feb 2017 17:16:48 +0100 Subject: [PATCH 027/357] ALSA: ctxfi: Fallback DMA mask to 32bit commit 15c75b09f8d190f89ab4db463b87d411ca349dfe upstream. Currently ctxfi driver tries to set only the 64bit DMA mask on 64bit architectures, and bails out if it fails. This causes a problem on some platforms since the 64bit DMA isn't always guaranteed. We should fall back to the default 32bit DMA when 64bit DMA fails. Fixes: 6d74b86d3c0f ("ALSA: ctxfi - Allow 64bit DMA") Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/ctxfi/cthw20k1.c | 19 ++++++------------- sound/pci/ctxfi/cthw20k2.c | 19 ++++++------------- 2 files changed, 12 insertions(+), 26 deletions(-) diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c index 9667cbfb0ca2..ab4cdab5cfa5 100644 --- a/sound/pci/ctxfi/cthw20k1.c +++ b/sound/pci/ctxfi/cthw20k1.c @@ -27,12 +27,6 @@ #include "cthw20k1.h" #include "ct20k1reg.h" -#if BITS_PER_LONG == 32 -#define CT_XFI_DMA_MASK DMA_BIT_MASK(32) /* 32 bit PTE */ -#else -#define CT_XFI_DMA_MASK DMA_BIT_MASK(64) /* 64 bit PTE */ -#endif - struct hw20k1 { struct hw hw; spinlock_t reg_20k1_lock; @@ -1904,19 +1898,18 @@ static int hw_card_start(struct hw *hw) { int err; struct pci_dev *pci = hw->pci; + const unsigned int dma_bits = BITS_PER_LONG; err = pci_enable_device(pci); if (err < 0) return err; /* Set DMA transfer mask */ - if (dma_set_mask(&pci->dev, CT_XFI_DMA_MASK) < 0 || - dma_set_coherent_mask(&pci->dev, CT_XFI_DMA_MASK) < 0) { - dev_err(hw->card->dev, - "architecture does not support PCI busmaster DMA with mask 0x%llx\n", - CT_XFI_DMA_MASK); - err = -ENXIO; - goto error1; + if (dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) { + dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(dma_bits)); + } else { + dma_set_mask(&pci->dev, DMA_BIT_MASK(32)); + dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(32)); } if (!hw->io_base) { diff --git a/sound/pci/ctxfi/cthw20k2.c b/sound/pci/ctxfi/cthw20k2.c index 6414ecf93efa..18ee7768b7c4 100644 --- a/sound/pci/ctxfi/cthw20k2.c +++ b/sound/pci/ctxfi/cthw20k2.c @@ -26,12 +26,6 @@ #include "cthw20k2.h" #include "ct20k2reg.h" -#if BITS_PER_LONG == 32 -#define CT_XFI_DMA_MASK DMA_BIT_MASK(32) /* 32 bit PTE */ -#else -#define CT_XFI_DMA_MASK DMA_BIT_MASK(64) /* 64 bit PTE */ -#endif - struct hw20k2 { struct hw hw; /* for i2c */ @@ -2029,19 +2023,18 @@ static int hw_card_start(struct hw *hw) int err = 0; struct pci_dev *pci = hw->pci; unsigned int gctl; + const unsigned int dma_bits = BITS_PER_LONG; err = pci_enable_device(pci); if (err < 0) return err; /* Set DMA transfer mask */ - if (dma_set_mask(&pci->dev, CT_XFI_DMA_MASK) < 0 || - dma_set_coherent_mask(&pci->dev, CT_XFI_DMA_MASK) < 0) { - dev_err(hw->card->dev, - "architecture does not support PCI busmaster DMA with mask 0x%llx\n", - CT_XFI_DMA_MASK); - err = -ENXIO; - goto error1; + if (!dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) { + dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(dma_bits)); + } else { + dma_set_mask(&pci->dev, DMA_BIT_MASK(32)); + dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(32)); } if (!hw->io_base) { From 09cd5d3479b930f5cde8ed4d66784e724ffcf08c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 28 Feb 2017 22:15:51 +0100 Subject: [PATCH 028/357] ALSA: seq: Fix link corruption by event error handling commit f3ac9f737603da80c2da3e84b89e74429836bb6d upstream. The sequencer FIFO management has a bug that may lead to a corruption (shortage) of the cell linked list. When a sequencer client faces an error at the event delivery, it tries to put back the dequeued cell. When the first queue was put back, this forgot the tail pointer tracking, and the link will be screwed up. Although there is no memory corruption, the sequencer client may stall forever at exit while flushing the pending FIFO cells in snd_seq_pool_done(), as spotted by syzkaller. This patch addresses the missing tail pointer tracking at snd_seq_fifo_cell_putback(). Also the patch makes sure to clear the cell->enxt pointer at snd_seq_fifo_event_in() for avoiding a similar mess-up of the FIFO linked list. Reported-by: Dmitry Vyukov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_fifo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c index 1d5acbe0c08b..86240d02b530 100644 --- a/sound/core/seq/seq_fifo.c +++ b/sound/core/seq/seq_fifo.c @@ -135,6 +135,7 @@ int snd_seq_fifo_event_in(struct snd_seq_fifo *f, f->tail = cell; if (f->head == NULL) f->head = cell; + cell->next = NULL; f->cells++; spin_unlock_irqrestore(&f->lock, flags); @@ -214,6 +215,8 @@ void snd_seq_fifo_cell_putback(struct snd_seq_fifo *f, spin_lock_irqsave(&f->lock, flags); cell->next = f->head; f->head = cell; + if (!f->tail) + f->tail = cell; f->cells++; spin_unlock_irqrestore(&f->lock, flags); } From 2abe620e01c9a39c3b76dd419f6c0e58fae4b885 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 28 Feb 2017 17:27:57 +0100 Subject: [PATCH 029/357] ALSA: hda - Add subwoofer support for Dell Inspiron 17 7000 Gaming commit 493de342748cc6f52938096f5480cf291da58a0b upstream. Dell Inspiron 17 7000 Gaming laptop needs a similar quirk like Inspiron 7599 to support its subwoofer speaker. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=194191 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d4402b02b1bc..f026fc94b227 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5577,6 +5577,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE), SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME), + SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), From acb06ff2d5d559c7a934fdb7f841cfc19eb4e4bd Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Mon, 27 Feb 2017 10:11:47 +0800 Subject: [PATCH 030/357] ALSA: hda - Fix micmute hotkey problem for a lenovo AIO machine commit 29693efcea0f38cf40d0055d2401490a4f9bf8be upstream. On this machine, the micmute button is connected to Line2 of the codec and the micmute led is connected to GPIO2 of the codec. After applying this quirk, both hotkey and led work well. Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f026fc94b227..0c62b1d8c11b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5693,6 +5693,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), + SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "IdeaPad Y410P", ALC269_FIXUP_NO_SHUTUP), From 4401e4779e2e6edea43f80ba1dc8e25e9bec7abd Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 8 Feb 2017 14:07:42 -0800 Subject: [PATCH 031/357] hwmon: (it87) Do not overwrite bit 2..6 of pwm control registers commit 4c7b8ca1ae5ed9e27014732c8a918ba11a86cf09 upstream. In IT8620E, after setting pwm control to manual, it was observed that pwm values for fan 4..6 have reversed results (writing 0 results in fans running at full speed, writing 255 results in fans turned off). With the new PWM control, pwm polarity for pwm control 4..6 is specified in its pwm control registers. Those registers are overwritten when setting the pwm mode or the temperature mapping. Do not touch bit 2..6 of pwm control registers on register writes to fix the problem. Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/it87.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index ad82cb28d87a..991e0fce781e 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -1300,25 +1300,35 @@ static ssize_t set_pwm_enable(struct device *dev, struct device_attribute *attr, it87_write_value(data, IT87_REG_FAN_MAIN_CTRL, data->fan_main_ctrl); } else { + u8 ctrl; + /* No on/off mode, set maximum pwm value */ data->pwm_duty[nr] = pwm_to_reg(data, 0xff); it87_write_value(data, IT87_REG_PWM_DUTY[nr], data->pwm_duty[nr]); /* and set manual mode */ - data->pwm_ctrl[nr] = has_newer_autopwm(data) ? - data->pwm_temp_map[nr] : - data->pwm_duty[nr]; - it87_write_value(data, IT87_REG_PWM[nr], - data->pwm_ctrl[nr]); + if (has_newer_autopwm(data)) { + ctrl = (data->pwm_ctrl[nr] & 0x7c) | + data->pwm_temp_map[nr]; + } else { + ctrl = data->pwm_duty[nr]; + } + data->pwm_ctrl[nr] = ctrl; + it87_write_value(data, IT87_REG_PWM[nr], ctrl); } } else { - if (val == 1) /* Manual mode */ - data->pwm_ctrl[nr] = has_newer_autopwm(data) ? - data->pwm_temp_map[nr] : - data->pwm_duty[nr]; - else /* Automatic mode */ - data->pwm_ctrl[nr] = 0x80 | data->pwm_temp_map[nr]; - it87_write_value(data, IT87_REG_PWM[nr], data->pwm_ctrl[nr]); + u8 ctrl; + + if (has_newer_autopwm(data)) { + ctrl = (data->pwm_ctrl[nr] & 0x7c) | + data->pwm_temp_map[nr]; + if (val != 1) + ctrl |= 0x80; + } else { + ctrl = (val == 1 ? data->pwm_duty[nr] : 0x80); + } + data->pwm_ctrl[nr] = ctrl; + it87_write_value(data, IT87_REG_PWM[nr], ctrl); if (data->type != it8603 && nr < 3) { /* set SmartGuardian mode */ @@ -1462,7 +1472,8 @@ static ssize_t set_pwm_temp_map(struct device *dev, * otherwise, just store it for later use. */ if (data->pwm_ctrl[nr] & 0x80) { - data->pwm_ctrl[nr] = 0x80 | data->pwm_temp_map[nr]; + data->pwm_ctrl[nr] = (data->pwm_ctrl[nr] & 0xfc) | + data->pwm_temp_map[nr]; it87_write_value(data, IT87_REG_PWM[nr], data->pwm_ctrl[nr]); } mutex_unlock(&data->update_lock); From 6c95eba9ca104fad6abd758c58b3ac55c44f627d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 8 Feb 2017 14:02:59 -0800 Subject: [PATCH 032/357] hwmon: (it87) Ensure that pwm control cache is current before updating values commit 82dbe987b70042b340f851bdc969a971081e5f02 upstream. If sensor attributes were never read, the pwm control data has not been initiialized, which can cause wrong driver behavior. Ensure that cached data is current before acting on it. Reported-by: Kevin Folz Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/it87.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index 991e0fce781e..43146162c122 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -1354,6 +1354,7 @@ static ssize_t set_pwm(struct device *dev, struct device_attribute *attr, return -EINVAL; mutex_lock(&data->update_lock); + it87_update_pwm_ctrl(data, nr); if (has_newer_autopwm(data)) { /* * If we are in automatic mode, the PWM duty cycle register @@ -1466,6 +1467,7 @@ static ssize_t set_pwm_temp_map(struct device *dev, } mutex_lock(&data->update_lock); + it87_update_pwm_ctrl(data, nr); data->pwm_temp_map[nr] = reg; /* * If we are in automatic mode, write the temp mapping immediately; From 2893a55e39fcfe3b9f5738584db98848b4957d9f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 26 Jan 2017 12:37:32 +0100 Subject: [PATCH 033/357] staging: greybus: loopback: fix broken udelay commit 33b8807a6fe10d0e675e0704444373a6fad93188 upstream. The loopback driver allows the user to set a minimum delay of up to one second to be inserted between test iterations (i.e. request submissions). The delay is currently specified in microseconds and is implemented using udelay. Busy looping for long periods is not just anti-social; udelay must not be used for delays longer than a few milliseconds due to the risk of integer overflow. Replace the broken udelay with a usleep_range with a 100 us range for short delays (< 20 ms) and otherwise revert to using msleep. Fixes: b36f04fa9417 ("greybus: loopback: Convert thread delay to microseconds") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/loopback.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/staging/greybus/loopback.c b/drivers/staging/greybus/loopback.c index 7882306adeca..29dc249b0c74 100644 --- a/drivers/staging/greybus/loopback.c +++ b/drivers/staging/greybus/loopback.c @@ -1051,8 +1051,13 @@ static int gb_loopback_fn(void *data) gb_loopback_calculate_stats(gb, !!error); } gb->send_count++; - if (us_wait) - udelay(us_wait); + + if (us_wait) { + if (us_wait < 20000) + usleep_range(us_wait, us_wait + 100); + else + msleep(us_wait / 1000); + } } gb_pm_runtime_put_autosuspend(bundle); From 148c4526d76e1af4065fac629bff3fcc5019de1c Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Sun, 19 Feb 2017 16:35:59 -0500 Subject: [PATCH 034/357] staging/lustre/lnet: Fix allocation size for sv_cpt_data commit dc7ffefdcc28a45214aa707fdc3df6a5e611ba09 upstream. This is unbreaking another of those "stealth" janitor patches that got in and subtly broke some things. sv_cpt_data is a pointer to pointer, so need to dereference it twice to allocate the correct structure size. Fixes: 9899cb68c6c2 ("Staging: lustre: rpc: Use sizeof type *pointer instead of sizeof type.") CC: Sandhya Bankar Signed-off-by: Oleg Drokin Reviewed-by: James Simmons Reviewed-by: Doug Oucharek Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/lnet/selftest/rpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c index f5619d8744ef..0256d65dfcd8 100644 --- a/drivers/staging/lustre/lnet/selftest/rpc.c +++ b/drivers/staging/lustre/lnet/selftest/rpc.c @@ -252,7 +252,7 @@ srpc_service_init(struct srpc_service *svc) svc->sv_shuttingdown = 0; svc->sv_cpt_data = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(*svc->sv_cpt_data)); + sizeof(**svc->sv_cpt_data)); if (!svc->sv_cpt_data) return -ENOMEM; From c7472b964d9a1bfabe4953175cdbd5ccf44bc416 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jan 2017 15:53:08 +0100 Subject: [PATCH 035/357] staging: rtl: fix possible NULL pointer dereference commit 6e017006022abfea5d2466cad936065f45763ad1 upstream. gcc-7 detects that wlanhdr_to_ethhdr() in two drivers calls memcpy() with a destination argument that an earlier function call may have set to NULL: staging/rtl8188eu/core/rtw_recv.c: In function 'wlanhdr_to_ethhdr': staging/rtl8188eu/core/rtw_recv.c:1318:2: warning: argument 1 null where non-null expected [-Wnonnull] staging/rtl8712/rtl871x_recv.c: In function 'r8712_wlanhdr_to_ethhdr': staging/rtl8712/rtl871x_recv.c:649:2: warning: argument 1 null where non-null expected [-Wnonnull] I'm fixing this by adding a NULL pointer check and returning failure from the function, which is hopefully already handled properly. This seems to date back to when the drivers were originally added, so backporting the fix to stable seems appropriate. There are other related realtek drivers in the kernel, but none of them contain a function with a similar name or produce this warning. Fixes: 1cc18a22b96b ("staging: r8188eu: Add files for new driver - part 5") Fixes: 2865d42c78a9 ("staging: r8712u: Add the new driver to the mainline kernel") Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/core/rtw_recv.c | 3 +++ drivers/staging/rtl8712/rtl871x_recv.c | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/staging/rtl8188eu/core/rtw_recv.c b/drivers/staging/rtl8188eu/core/rtw_recv.c index b87cbbbee054..b39fd1e9b4a0 100644 --- a/drivers/staging/rtl8188eu/core/rtw_recv.c +++ b/drivers/staging/rtl8188eu/core/rtw_recv.c @@ -1383,6 +1383,9 @@ static int wlanhdr_to_ethhdr(struct recv_frame *precvframe) ptr = recvframe_pull(precvframe, (rmv_len-sizeof(struct ethhdr) + (bsnaphdr ? 2 : 0))); } + if (!ptr) + return _FAIL; + memcpy(ptr, pattrib->dst, ETH_ALEN); memcpy(ptr+ETH_ALEN, pattrib->src, ETH_ALEN); diff --git a/drivers/staging/rtl8712/rtl871x_recv.c b/drivers/staging/rtl8712/rtl871x_recv.c index cbd2e51ba42b..cedf25b0b093 100644 --- a/drivers/staging/rtl8712/rtl871x_recv.c +++ b/drivers/staging/rtl8712/rtl871x_recv.c @@ -643,11 +643,16 @@ sint r8712_wlanhdr_to_ethhdr(union recv_frame *precvframe) /* append rx status for mp test packets */ ptr = recvframe_pull(precvframe, (rmv_len - sizeof(struct ethhdr) + 2) - 24); + if (!ptr) + return _FAIL; memcpy(ptr, get_rxmem(precvframe), 24); ptr += 24; - } else + } else { ptr = recvframe_pull(precvframe, (rmv_len - sizeof(struct ethhdr) + (bsnaphdr ? 2 : 0))); + if (!ptr) + return _FAIL; + } memcpy(ptr, pattrib->dst, ETH_ALEN); memcpy(ptr + ETH_ALEN, pattrib->src, ETH_ALEN); From 59cd503c8cfd3f69f8126165b274860bbbd058db Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 16 Jan 2017 18:00:00 +0000 Subject: [PATCH 036/357] coresight: STM: Balance enable/disable commit 4474f4c40a9c607c7317e686b23619b7b768004f upstream. The stm is automatically enabled when an application sets the policy via ->link() call back by using coresight_enable(), which keeps the refcount of the current users of the STM. However, the unlink() callback issues stm_disable() directly, which leaves the STM turned off, without the coresight layer knowing about it. This prevents any further uses of the STM hardware as the coresight layer still thinks the STM is turned on and doesn't enable the hardware when required. Even manually enabling the STM via sysfs can't really enable the hw. e.g, $ echo 1 > $CS_DEVS/$ETR/enable_sink $ mkdir -p $CONFIG_FS/stp-policy/$source.0/stm_test/ $ echo 32768 65535 > $CONFIG_FS/stp-policy/$source.0/stm_test/channels $ echo 64 > $CS_DEVS/$source/traceid $ ./stm_app Sending 64000 byte blocks of pattern 0 at 0us intervals Success to map channel(32768~32783) to 0xffffa95fa000 Sending on channel 32768 $ dd if=/dev/$ETR of=~/trace.bin.1 597+1 records in 597+1 records out 305920 bytes (306 kB) copied, 0.399952 s, 765 kB/s $ ./stm_app Sending 64000 byte blocks of pattern 0 at 0us intervals Success to map channel(32768~32783) to 0xffff7e9e2000 Sending on channel 32768 $ dd if=/dev/$ETR of=~/trace.bin.2 0+0 records in 0+0 records out 0 bytes (0 B) copied, 0.0232083 s, 0.0 kB/s Note that we don't get any data from the ETR for the second session. Also dmesg shows : [ 77.520458] coresight-tmc 20800000.etr: TMC-ETR enabled [ 77.537097] coresight-replicator etr_replicator@20890000: REPLICATOR enabled [ 77.558828] coresight-replicator main_replicator@208a0000: REPLICATOR enabled [ 77.581068] coresight-funnel 208c0000.main_funnel: FUNNEL inport 0 enabled [ 77.602217] coresight-tmc 20840000.etf: TMC-ETF enabled [ 77.618422] coresight-stm 20860000.stm: STM tracing enabled [ 139.554252] coresight-stm 20860000.stm: STM tracing disabled # End of first tracing session [ 146.351135] coresight-tmc 20800000.etr: TMC read start [ 146.514486] coresight-tmc 20800000.etr: TMC read end # Note that the STM is not turned on via stm_generic_link()->coresight_enable() # and hence none of the components are turned on. [ 152.479080] coresight-tmc 20800000.etr: TMC read start [ 152.542632] coresight-tmc 20800000.etr: TMC read end This patch fixes the problem by balancing the unlink operation by using the coresight_disable(), keeping the coresight layer in sync with the hardware state and thus allowing normal usage of the STM component. Fixes: commit 237483aa5cf43 ("coresight: stm: adding driver for CoreSight STM component") Cc: Pratik Patel Cc: Greg Kroah-Hartman Acked-by: Mathieu Poirier Reviewed-by: Chunyan Zhang Reported-by: Robert Walker Signed-off-by: Suzuki K Poulose Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-stm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c index 49e0f1b925a5..8e7905632d25 100644 --- a/drivers/hwtracing/coresight/coresight-stm.c +++ b/drivers/hwtracing/coresight/coresight-stm.c @@ -356,7 +356,7 @@ static void stm_generic_unlink(struct stm_data *stm_data, if (!drvdata || !drvdata->csdev) return; - stm_disable(drvdata->csdev, NULL); + coresight_disable(drvdata->csdev); } static phys_addr_t From 2f714ba1282aa6f5f0158329a1b2d3858698618c Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 14 Feb 2017 17:31:03 +0200 Subject: [PATCH 037/357] regulator: Fix regulator_summary for deviceless consumers commit e42a46b6f52473661ad192f76a128a68fe301df4 upstream. It is allowed to call regulator_get with a NULL dev argument (_regulator_get explicitly checks for it) but this causes an error later when printing /sys/kernel/debug/regulator_summary. Fix this by explicitly handling "deviceless" consumers in the debugfs code. Signed-off-by: Leonard Crestez Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 5c1519b229e0..9faccfceb53c 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -4357,12 +4357,13 @@ static void regulator_summary_show_subtree(struct seq_file *s, seq_puts(s, "\n"); list_for_each_entry(consumer, &rdev->consumer_list, list) { - if (consumer->dev->class == ®ulator_class) + if (consumer->dev && consumer->dev->class == ®ulator_class) continue; seq_printf(s, "%*s%-*s ", (level + 1) * 3 + 1, "", - 30 - (level + 1) * 3, dev_name(consumer->dev)); + 30 - (level + 1) * 3, + consumer->dev ? dev_name(consumer->dev) : "deviceless"); switch (rdev->desc->type) { case REGULATOR_VOLTAGE: From 84c2697c9cd3c4ed05945d80fdfd4b7e906dd074 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 7 Feb 2017 15:51:47 +0000 Subject: [PATCH 038/357] tpm_tis: fix the error handling of init_tis() commit 5939eaf4f9d432586dd2cdeea778506471e8088e upstream. Add the missing platform_driver_unregister() and remove the duplicate platform_device_unregister(force_pdev) in the error handling case. Fixes: 00194826e6be ("tpm_tis: Clean up the force=1 module parameter") Signed-off-by: Wei Yongjun Reviewed-by: Jason Gunthorpe Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index eaf5730d79eb..8022bea27fed 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -421,7 +421,7 @@ err_pnp: acpi_bus_unregister_driver(&tis_acpi_driver); err_acpi: #endif - platform_device_unregister(force_pdev); + platform_driver_unregister(&tis_drv); err_platform: if (force_pdev) platform_device_unregister(force_pdev); From 61cb3c6357fd77a2b1af49a9015955b9f82c68d4 Mon Sep 17 00:00:00 2001 From: CQ Tang Date: Mon, 30 Jan 2017 09:39:52 -0800 Subject: [PATCH 039/357] iommu/vt-d: Fix some macros that are incorrectly specified in intel-iommu commit aaa59306b0b7e0ca4ba92cc04c5db101cbb1c096 upstream. Some of the macros are incorrect with wrong bit-shifts resulting in picking the incorrect invalidation granularity. Incorrect Source-ID in extended devtlb invalidation caused device side errors. To: Joerg Roedel To: David Woodhouse Cc: iommu@lists.linux-foundation.org Cc: linux-kernel@vger.kernel.org Cc: CQ Tang Cc: Ashok Raj Fixes: 2f26e0a9 ("iommu/vt-d: Add basic SVM PASID support") Signed-off-by: CQ Tang Signed-off-by: Ashok Raj Tested-by: CQ Tang Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- include/linux/intel-iommu.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index d49e26c6cdc7..23e129ef6726 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -153,8 +153,8 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) #define DMA_TLB_DSI_FLUSH (((u64)2) << 60) #define DMA_TLB_PSI_FLUSH (((u64)3) << 60) -#define DMA_TLB_IIRG(type) ((type >> 60) & 7) -#define DMA_TLB_IAIG(val) (((val) >> 57) & 7) +#define DMA_TLB_IIRG(type) ((type >> 60) & 3) +#define DMA_TLB_IAIG(val) (((val) >> 57) & 3) #define DMA_TLB_READ_DRAIN (((u64)1) << 49) #define DMA_TLB_WRITE_DRAIN (((u64)1) << 48) #define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32) @@ -164,9 +164,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) /* INVALID_DESC */ #define DMA_CCMD_INVL_GRANU_OFFSET 61 -#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3) -#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3) -#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3) +#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 4) +#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 4) +#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 4) #define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7) #define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6) #define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16))) @@ -316,8 +316,8 @@ enum { #define QI_DEV_EIOTLB_SIZE (((u64)1) << 11) #define QI_DEV_EIOTLB_GLOB(g) ((u64)g) #define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32) -#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32) -#define QI_DEV_EIOTLB_QDEP(qd) (((qd) & 0x1f) << 16) +#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16) +#define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4) #define QI_DEV_EIOTLB_MAX_INVS 32 #define QI_PGRP_IDX(idx) (((u64)(idx)) << 55) From 24427cd71d2fbef3eeb087e802ff327c18c7275f Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Mon, 30 Jan 2017 09:39:53 -0800 Subject: [PATCH 040/357] iommu/vt-d: Tylersburg isoch identity map check is done too late. commit 21e722c4c8377b5bc82ad058fed12165af739c1b upstream. The check to set identity map for tylersburg is done too late. It needs to be done before the check for identity_map domain is done. To: Joerg Roedel To: David Woodhouse Cc: iommu@lists.linux-foundation.org Cc: linux-kernel@vger.kernel.org Cc: Ashok Raj Fixes: 86080ccc22 ("iommu/vt-d: Allocate si_domain in init_dmars()") Signed-off-by: Ashok Raj Reported-by: Yunhong Jiang Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index d82637ab09fd..34be95ee9038 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3325,13 +3325,14 @@ static int __init init_dmars(void) iommu_identity_mapping |= IDENTMAP_GFX; #endif + check_tylersburg_isoch(); + if (iommu_identity_mapping) { ret = si_domain_init(hw_pass_through); if (ret) goto free_iommu; } - check_tylersburg_isoch(); /* * If we copied translations from a previous kernel in the kdump From c06d74df4ebb9ab3fedcf6d623816944edd206f5 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 19 Jan 2017 13:53:15 -0800 Subject: [PATCH 041/357] CIFS: Fix splice read for non-cached files commit 9c25702cee1405099f982894c865c163de7909a8 upstream. Currently we call copy_page_to_iter() for uncached reading into a pipe. This is wrong because it treats pages as VFS cache pages and copies references rather than actual data. When we are trying to read from the pipe we end up calling page_cache_pipe_buf_confirm() which returns -ENODATA. This error is translated into 0 which is returned to a user. This issue is reproduced by running xfs-tests suite (generic test #249) against mount points with "cache=none". Fix it by mapping pages manually and calling copy_to_iter() that copies data into the pipe. Signed-off-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/file.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 18a1e1d6671f..1cd0e2eefc66 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2884,7 +2884,15 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter) for (i = 0; i < rdata->nr_pages; i++) { struct page *page = rdata->pages[i]; size_t copy = min_t(size_t, remaining, PAGE_SIZE); - size_t written = copy_page_to_iter(page, 0, copy, iter); + size_t written; + + if (unlikely(iter->type & ITER_PIPE)) { + void *addr = kmap_atomic(page); + + written = copy_to_iter(addr, copy, iter); + kunmap_atomic(addr); + } else + written = copy_page_to_iter(page, 0, copy, iter); remaining -= written; if (written < copy && iov_iter_count(iter) > 0) break; From f1faaec4843a6bfcb795887f5b1a280fb874789d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 24 Feb 2017 14:55:45 -0800 Subject: [PATCH 042/357] mm, devm_memremap_pages: hold device_hotplug lock over mem_hotplug_{begin, done} commit b5d24fda9c3dce51fcb4eee459550a458eaaf1e2 upstream. The mem_hotplug_{begin,done} lock coordinates with {get,put}_online_mems() to hold off "readers" of the current state of memory from new hotplug actions. mem_hotplug_begin() expects exclusive access, via the device_hotplug lock, to set mem_hotplug.active_writer. Calling mem_hotplug_begin() without locking device_hotplug can lead to corrupting mem_hotplug.refcount and missed wakeups / soft lockups. [dan.j.williams@intel.com: v2] Link: http://lkml.kernel.org/r/148728203365.38457.17804568297887708345.stgit@dwillia2-desk3.amr.corp.intel.com Link: http://lkml.kernel.org/r/148693885680.16345.17802627926777862337.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: f931ab479dd2 ("mm: fix devm_memremap_pages crash, use mem_hotplug_{begin, done}") Signed-off-by: Dan Williams Reported-by: Ben Hutchings Cc: Michal Hocko Cc: Toshi Kani Cc: Vlastimil Babka Cc: Logan Gunthorpe Cc: Masayoshi Mizuma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/memremap.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/memremap.c b/kernel/memremap.c index 9ecedc28b928..06123234f118 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -246,9 +246,13 @@ static void devm_memremap_pages_release(struct device *dev, void *data) /* pages are dead and unused, undo the arch mapping */ align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(resource_size(res), SECTION_SIZE); + + lock_device_hotplug(); mem_hotplug_begin(); arch_remove_memory(align_start, align_size); mem_hotplug_done(); + unlock_device_hotplug(); + untrack_pfn(NULL, PHYS_PFN(align_start), align_size); pgmap_radix_release(res); dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, @@ -360,9 +364,11 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (error) goto err_pfn_remap; + lock_device_hotplug(); mem_hotplug_begin(); error = arch_add_memory(nid, align_start, align_size, true); mem_hotplug_done(); + unlock_device_hotplug(); if (error) goto err_add_memory; From d1e8042628a31e28c8e92411ab83a28bd5e3f7c9 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 24 Feb 2017 14:59:33 -0800 Subject: [PATCH 043/357] mm/page_alloc: fix nodes for reclaim in fast path commit e02dc017c3032dcdce1b993af0db135462e1b4b7 upstream. When @node_reclaim_node isn't 0, the page allocator tries to reclaim pages if the amount of free memory in the zones are below the low watermark. On Power platform, none of NUMA nodes are scanned for page reclaim because no nodes match the condition in zone_allows_reclaim(). On Power platform, RECLAIM_DISTANCE is set to 10 which is the distance of Node-A to Node-A. So the preferred node even won't be scanned for page reclaim. __alloc_pages_nodemask() get_page_from_freelist() zone_allows_reclaim() Anton proposed the test code as below: # cat alloc.c : int main(int argc, char *argv[]) { void *p; unsigned long size; unsigned long start, end; start = time(NULL); size = strtoul(argv[1], NULL, 0); printf("To allocate %ldGB memory\n", size); size <<= 30; p = malloc(size); assert(p); memset(p, 0, size); end = time(NULL); printf("Used time: %ld seconds\n", end - start); sleep(3600); return 0; } The system I use for testing has two NUMA nodes. Both have 128GB memory. In below scnario, the page caches on node#0 should be reclaimed when it encounters pressure to accommodate request of allocation. # echo 2 > /proc/sys/vm/zone_reclaim_mode; \ sync; \ echo 3 > /proc/sys/vm/drop_caches; \ # taskset -c 0 cat file.32G > /dev/null; \ grep FilePages /sys/devices/system/node/node0/meminfo Node 0 FilePages: 33619712 kB # taskset -c 0 ./alloc 128 # grep FilePages /sys/devices/system/node/node0/meminfo Node 0 FilePages: 33619840 kB # grep MemFree /sys/devices/system/node/node0/meminfo Node 0 MemFree: 186816 kB With the patch applied, the pagecache on node-0 is reclaimed when its free memory is running out. It's the expected behaviour. # echo 2 > /proc/sys/vm/zone_reclaim_mode; \ sync; \ echo 3 > /proc/sys/vm/drop_caches # taskset -c 0 cat file.32G > /dev/null; \ grep FilePages /sys/devices/system/node/node0/meminfo Node 0 FilePages: 33605568 kB # taskset -c 0 ./alloc 128 # grep FilePages /sys/devices/system/node/node0/meminfo Node 0 FilePages: 1379520 kB # grep MemFree /sys/devices/system/node/node0/meminfo Node 0 MemFree: 317120 kB Fixes: 5f7a75acdb24 ("mm: page_alloc: do not cache reclaim distances") Link: http://lkml.kernel.org/r/1486532455-29613-1-git-send-email-gwshan@linux.vnet.ibm.com Signed-off-by: Gavin Shan Acked-by: Mel Gorman Acked-by: Michal Hocko Cc: Anton Blanchard Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f4a02e240fb6..1460e6ad5e14 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2858,7 +2858,7 @@ bool zone_watermark_ok_safe(struct zone *z, unsigned int order, #ifdef CONFIG_NUMA static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) { - return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) < + return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <= RECLAIM_DISTANCE; } #else /* CONFIG_NUMA */ From 58d1dbb904ba9ce1f8ac99c5d210ad7bb14eed08 Mon Sep 17 00:00:00 2001 From: Vinayak Menon Date: Fri, 24 Feb 2017 14:59:39 -0800 Subject: [PATCH 044/357] mm: vmpressure: fix sending wrong events on underflow commit e1587a4945408faa58d0485002c110eb2454740c upstream. At the end of a window period, if the reclaimed pages is greater than scanned, an unsigned underflow can result in a huge pressure value and thus a critical event. Reclaimed pages is found to go higher than scanned because of the addition of reclaimed slab pages to reclaimed in shrink_node without a corresponding increment to scanned pages. Minchan Kim mentioned that this can also happen in the case of a THP page where the scanned is 1 and reclaimed could be 512. Link: http://lkml.kernel.org/r/1486641577-11685-1-git-send-email-vinmenon@codeaurora.org Signed-off-by: Vinayak Menon Acked-by: Minchan Kim Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Mel Gorman Cc: Vlastimil Babka Cc: Rik van Riel Cc: Vladimir Davydov Cc: Anton Vorontsov Cc: Shiraz Hashim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/vmpressure.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 149fdf6c5c56..6063581f705c 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -112,8 +112,15 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned, unsigned long reclaimed) { unsigned long scale = scanned + reclaimed; - unsigned long pressure; + unsigned long pressure = 0; + /* + * reclaimed can be greater than scanned in cases + * like THP, where the scanned is 1 and reclaimed + * could be 512 + */ + if (reclaimed >= scanned) + goto out; /* * We calculate the ratio (in percents) of how many pages were * scanned vs. reclaimed in a given time frame (window). Note that @@ -124,6 +131,7 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned, pressure = scale - (reclaimed * scale / scanned); pressure = pressure * 100 / scale; +out: pr_debug("%s: %3lu (s: %lu r: %lu)\n", __func__, pressure, scanned, reclaimed); From 2c290eede9b6375ad15025aa85a7c07c3ce1a3f3 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Fri, 24 Feb 2017 14:59:59 -0800 Subject: [PATCH 045/357] mm: do not access page->mapping directly on page_endio commit dd8416c47715cf324c9a16f13273f9fda87acfed upstream. With rw_page, page_endio is used for completing IO on a page and it propagates write error to the address space if the IO fails. The problem is it accesses page->mapping directly which might be okay for file-backed pages but it shouldn't for anonymous page. Otherwise, it can corrupt one of field from anon_vma under us and system goes panic randomly. swap_writepage bdev_writepage ops->rw_page I encountered the BUG during developing new zram feature and it was really hard to figure it out because it made random crash, somtime mmap_sem lockdep, sometime other places where places never related to zram/zsmalloc, and not reproducible with some configuration. When I consider how that bug is subtle and people do fast-swap test with brd, it's worth to add stable mark, I think. Fixes: dd6bd0d9c7db ("swap: use bdev_read_page() / bdev_write_page()") Signed-off-by: Minchan Kim Acked-by: Michal Hocko Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/filemap.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index d8d7df82c69a..edfb90e3830c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -910,9 +910,12 @@ void page_endio(struct page *page, bool is_write, int err) unlock_page(page); } else { if (err) { + struct address_space *mapping; + SetPageError(page); - if (page->mapping) - mapping_set_error(page->mapping, err); + mapping = page_mapping(page); + if (mapping) + mapping_set_error(mapping, err); } end_page_writeback(page); } From 8f6620e391a43d5da24fa09f98d9b51016545470 Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Fri, 24 Feb 2017 15:00:40 -0800 Subject: [PATCH 046/357] mm balloon: umount balloon_mnt when removing vb device commit 9c57b5808c625f4fc93da330b932647eaff321f7 upstream. With CONFIG_BALLOON_COMPACTION=y the kernel will mount balloon_mnt for balloon page migration when we probe a virtio_balloon device. However we do not unmount it when removing the device. Fix this. Fixes: b1123ea6d3b3 ("mm: balloon: use general non-lru movable page feature") Link: http://lkml.kernel.org/r/1486531318-35189-1-git-send-email-xieyisheng1@huawei.com Signed-off-by: Yisheng Xie Acked-by: Minchan Kim Cc: Rafael Aquini Cc: Konstantin Khlebnikov Cc: Gioh Kim Cc: Vlastimil Babka Cc: Michal Hocko Cc: Michael S. Tsirkin Cc: Jason Wang Cc: Hanjun Guo Cc: Xishi Qiu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_balloon.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 181793f07852..9d2738e9217f 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -615,8 +615,12 @@ static void virtballoon_remove(struct virtio_device *vdev) cancel_work_sync(&vb->update_balloon_stats_work); remove_common(vb); +#ifdef CONFIG_BALLOON_COMPACTION if (vb->vb_dev_info.inode) iput(vb->vb_dev_info.inode); + + kern_unmount(balloon_mnt); +#endif kfree(vb); } From 710531320af876192d76b2c1f68190a1df941b02 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 22 Feb 2017 15:45:58 -0800 Subject: [PATCH 047/357] mm, vmscan: cleanup lru size claculations commit fd538803731e50367b7c59ce4ad3454426a3d671 upstream. lruvec_lru_size returns the full size of the LRU list while we sometimes need a value reduced only to eligible zones (e.g. for lowmem requests). inactive_list_is_low is one such user. Later patches will add more of them. Add a new parameter to lruvec_lru_size and allow it filter out zones which are not eligible for the given context. Link: http://lkml.kernel.org/r/20170117103702.28542-2-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Johannes Weiner Acked-by: Hillf Danton Acked-by: Minchan Kim Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mmzone.h | 2 +- mm/vmscan.c | 81 ++++++++++++++++++++---------------------- mm/workingset.c | 2 +- 3 files changed, 41 insertions(+), 44 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f99c993dd500..7e273e243a13 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -779,7 +779,7 @@ static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec) #endif } -extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru); +extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx); #ifdef CONFIG_HAVE_MEMORY_PRESENT void memory_present(int nid, unsigned long start, unsigned long end); diff --git a/mm/vmscan.c b/mm/vmscan.c index fa30010a5277..cd516c632e8f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -234,22 +234,39 @@ bool pgdat_reclaimable(struct pglist_data *pgdat) pgdat_reclaimable_pages(pgdat) * 6; } -unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru) +/** + * lruvec_lru_size - Returns the number of pages on the given LRU list. + * @lruvec: lru vector + * @lru: lru to use + * @zone_idx: zones to consider (use MAX_NR_ZONES for the whole LRU list) + */ +unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx) { + unsigned long lru_size; + int zid; + if (!mem_cgroup_disabled()) - return mem_cgroup_get_lru_size(lruvec, lru); + lru_size = mem_cgroup_get_lru_size(lruvec, lru); + else + lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru); - return node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru); -} + for (zid = zone_idx + 1; zid < MAX_NR_ZONES; zid++) { + struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid]; + unsigned long size; -unsigned long lruvec_zone_lru_size(struct lruvec *lruvec, enum lru_list lru, - int zone_idx) -{ - if (!mem_cgroup_disabled()) - return mem_cgroup_get_zone_lru_size(lruvec, lru, zone_idx); + if (!managed_zone(zone)) + continue; + + if (!mem_cgroup_disabled()) + size = mem_cgroup_get_zone_lru_size(lruvec, lru, zid); + else + size = zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zid], + NR_ZONE_LRU_BASE + lru); + lru_size -= min(size, lru_size); + } + + return lru_size; - return zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zone_idx], - NR_ZONE_LRU_BASE + lru); } /* @@ -2028,11 +2045,10 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, struct scan_control *sc) { unsigned long inactive_ratio; - unsigned long inactive; - unsigned long active; + unsigned long inactive, active; + enum lru_list inactive_lru = file * LRU_FILE; + enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE; unsigned long gb; - struct pglist_data *pgdat = lruvec_pgdat(lruvec); - int zid; /* * If we don't have swap space, anonymous page deactivation @@ -2041,27 +2057,8 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, if (!file && !total_swap_pages) return false; - inactive = lruvec_lru_size(lruvec, file * LRU_FILE); - active = lruvec_lru_size(lruvec, file * LRU_FILE + LRU_ACTIVE); - - /* - * For zone-constrained allocations, it is necessary to check if - * deactivations are required for lowmem to be reclaimed. This - * calculates the inactive/active pages available in eligible zones. - */ - for (zid = sc->reclaim_idx + 1; zid < MAX_NR_ZONES; zid++) { - struct zone *zone = &pgdat->node_zones[zid]; - unsigned long inactive_zone, active_zone; - - if (!managed_zone(zone)) - continue; - - inactive_zone = lruvec_zone_lru_size(lruvec, file * LRU_FILE, zid); - active_zone = lruvec_zone_lru_size(lruvec, (file * LRU_FILE) + LRU_ACTIVE, zid); - - inactive -= min(inactive, inactive_zone); - active -= min(active, active_zone); - } + inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx); + active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx); gb = (inactive + active) >> (30 - PAGE_SHIFT); if (gb) @@ -2208,7 +2205,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * system is under heavy pressure. */ if (!inactive_list_is_low(lruvec, true, sc) && - lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) { + lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES) >> sc->priority) { scan_balance = SCAN_FILE; goto out; } @@ -2234,10 +2231,10 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * anon in [0], file in [1] */ - anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON) + - lruvec_lru_size(lruvec, LRU_INACTIVE_ANON); - file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE) + - lruvec_lru_size(lruvec, LRU_INACTIVE_FILE); + anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) + + lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES); + file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES) + + lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES); spin_lock_irq(&pgdat->lru_lock); if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { @@ -2275,7 +2272,7 @@ out: unsigned long size; unsigned long scan; - size = lruvec_lru_size(lruvec, lru); + size = lruvec_lru_size(lruvec, lru, MAX_NR_ZONES); scan = size >> sc->priority; if (!scan && pass && force_scan) diff --git a/mm/workingset.c b/mm/workingset.c index fb1f9183d89a..33f6f4db32fd 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -266,7 +266,7 @@ bool workingset_refault(void *shadow) } lruvec = mem_cgroup_lruvec(pgdat, memcg); refault = atomic_long_read(&lruvec->inactive_age); - active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE); + active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES); rcu_read_unlock(); /* From 521e92b198a89d3186930578e036d3b58a767081 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 22 Feb 2017 15:46:01 -0800 Subject: [PATCH 048/357] mm, vmscan: consider eligible zones in get_scan_count commit 71ab6cfe88dcf9f6e6a65eb85cf2bda20a257682 upstream. get_scan_count() considers the whole node LRU size when - doing SCAN_FILE due to many page cache inactive pages - calculating the number of pages to scan In both cases this might lead to unexpected behavior especially on 32b systems where we can expect lowmem memory pressure very often. A large highmem zone can easily distort SCAN_FILE heuristic because there might be only few file pages from the eligible zones on the node lru and we would still enforce file lru scanning which can lead to trashing while we could still scan anonymous pages. The later use of lruvec_lru_size can be problematic as well. Especially when there are not many pages from the eligible zones. We would have to skip over many pages to find anything to reclaim but shrink_node_memcg would only reduce the remaining number to scan by SWAP_CLUSTER_MAX at maximum. Therefore we can end up going over a large LRU many times without actually having chance to reclaim much if anything at all. The closer we are out of memory on lowmem zone the worse the problem will be. Fix this by filtering out all the ineligible zones when calculating the lru size for both paths and consider only sc->reclaim_idx zones. The patch would need to be tweaked a bit to apply to 4.10 and older but I will do that as soon as it hits the Linus tree in the next merge window. Link: http://lkml.kernel.org/r/20170117103702.28542-3-mhocko@kernel.org Fixes: b2e18757f2c9 ("mm, vmscan: begin reclaiming pages on a per-node basis") Signed-off-by: Michal Hocko Tested-by: Trevor Cordes Acked-by: Minchan Kim Acked-by: Hillf Danton Acked-by: Mel Gorman Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/vmscan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index cd516c632e8f..30a88b945a44 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2205,7 +2205,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * system is under heavy pressure. */ if (!inactive_list_is_low(lruvec, true, sc) && - lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES) >> sc->priority) { + lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) { scan_balance = SCAN_FILE; goto out; } @@ -2272,7 +2272,7 @@ out: unsigned long size; unsigned long scan; - size = lruvec_lru_size(lruvec, lru, MAX_NR_ZONES); + size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); scan = size >> sc->priority; if (!scan && pass && force_scan) From 6d94a6b32e97c56dfbe96cc489517b338608f091 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Mon, 27 Feb 2017 14:27:25 -0800 Subject: [PATCH 049/357] sigaltstack: support SS_AUTODISARM for CONFIG_COMPAT commit 441398d378f29a5ad6d0fcda07918e54e4961800 upstream. Currently SS_AUTODISARM is not supported in compatibility mode, but does not return -EINVAL either. This makes dosemu built with -m32 on x86_64 to crash. Also the kernel's sigaltstack selftest fails if compiled with -m32. This patch adds the needed support. Link: http://lkml.kernel.org/r/20170205101213.8163-2-stsp@list.ru Signed-off-by: Stas Sergeev Cc: Milosz Tanski Cc: Andy Lutomirski Cc: Al Viro Cc: Arnd Bergmann Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Oleg Nesterov Cc: Nicolas Pitre Cc: Waiman Long Cc: Dave Hansen Cc: Dmitry Safonov Cc: Wang Xiaoqiang Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/compat.h | 4 +++- kernel/signal.c | 11 +++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/linux/compat.h b/include/linux/compat.h index 63609398ef9f..d8535a430caf 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -711,8 +711,10 @@ int __compat_save_altstack(compat_stack_t __user *, unsigned long); compat_stack_t __user *__uss = uss; \ struct task_struct *t = current; \ put_user_ex(ptr_to_compat((void __user *)t->sas_ss_sp), &__uss->ss_sp); \ - put_user_ex(sas_ss_flags(sp), &__uss->ss_flags); \ + put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \ put_user_ex(t->sas_ss_size, &__uss->ss_size); \ + if (t->sas_ss_flags & SS_AUTODISARM) \ + sas_ss_reset(t); \ } while (0); asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, diff --git a/kernel/signal.c b/kernel/signal.c index 75761acc77cf..0b1415720a15 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3226,10 +3226,17 @@ int compat_restore_altstack(const compat_stack_t __user *uss) int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp) { + int err; struct task_struct *t = current; - return __put_user(ptr_to_compat((void __user *)t->sas_ss_sp), &uss->ss_sp) | - __put_user(sas_ss_flags(sp), &uss->ss_flags) | + err = __put_user(ptr_to_compat((void __user *)t->sas_ss_sp), + &uss->ss_sp) | + __put_user(t->sas_ss_flags, &uss->ss_flags) | __put_user(t->sas_ss_size, &uss->ss_size); + if (err) + return err; + if (t->sas_ss_flags & SS_AUTODISARM) + sas_ss_reset(t); + return 0; } #endif From 270e84a1e6effd6c0c6e9b13b196b5fdaa392954 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 27 Feb 2017 14:28:24 -0800 Subject: [PATCH 050/357] ipc/shm: Fix shmat mmap nil-page protection commit 95e91b831f87ac8e1f8ed50c14d709089b4e01b8 upstream. The issue is described here, with a nice testcase: https://bugzilla.kernel.org/show_bug.cgi?id=192931 The problem is that shmat() calls do_mmap_pgoff() with MAP_FIXED, and the address rounded down to 0. For the regular mmap case, the protection mentioned above is that the kernel gets to generate the address -- arch_get_unmapped_area() will always check for MAP_FIXED and return that address. So by the time we do security_mmap_addr(0) things get funky for shmat(). The testcase itself shows that while a regular user crashes, root will not have a problem attaching a nil-page. There are two possible fixes to this. The first, and which this patch does, is to simply allow root to crash as well -- this is also regular mmap behavior, ie when hacking up the testcase and adding mmap(... |MAP_FIXED). While this approach is the safer option, the second alternative is to ignore SHM_RND if the rounded address is 0, thus only having MAP_SHARED flags. This makes the behavior of shmat() identical to the mmap() case. The downside of this is obviously user visible, but does make sense in that it maintains semantics after the round-down wrt 0 address and mmap. Passes shm related ltp tests. Link: http://lkml.kernel.org/r/1486050195-18629-1-git-send-email-dave@stgolabs.net Signed-off-by: Davidlohr Bueso Reported-by: Gareth Evans Cc: Manfred Spraul Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- ipc/shm.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/ipc/shm.c b/ipc/shm.c index dbac8860c721..e2072ae4f90e 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1085,8 +1085,8 @@ out_unlock1: * "raddr" thing points to kernel space, and there has to be a wrapper around * this. */ -long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, - unsigned long shmlba) +long do_shmat(int shmid, char __user *shmaddr, int shmflg, + ulong *raddr, unsigned long shmlba) { struct shmid_kernel *shp; unsigned long addr; @@ -1107,8 +1107,13 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, goto out; else if ((addr = (ulong)shmaddr)) { if (addr & (shmlba - 1)) { - if (shmflg & SHM_RND) - addr &= ~(shmlba - 1); /* round down */ + /* + * Round down to the nearest multiple of shmlba. + * For sane do_mmap_pgoff() parameters, avoid + * round downs that trigger nil-page and MAP_FIXED. + */ + if ((shmflg & SHM_RND) && addr >= shmlba) + addr &= ~(shmlba - 1); else #ifndef __ARCH_FORCE_SHMLBA if (addr & ~PAGE_MASK) From d9cc31683a16f5619217d80c3d8e608c23c41afc Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 17 Jan 2017 06:45:41 -0500 Subject: [PATCH 051/357] ima: fix ima_d_path() possible race with rename commit bc15ed663e7e53ee4dc3e60f8d09c93a0528c694 upstream. On failure to return a pathname from ima_d_path(), a pointer to dname is returned, which is subsequently used in the IMA measurement list, the IMA audit records, and other audit logging. Saving the pointer to dname for later use has the potential to race with rename. Intead of returning a pointer to dname on failure, this patch returns a pointer to a copy of the filename. Reported-by: Al Viro Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/ima.h | 2 +- security/integrity/ima/ima_api.c | 20 ++++++++++++++++++-- security/integrity/ima/ima_main.c | 8 +++++--- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index db25f54a04fe..df7834aa1b8f 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -173,7 +173,7 @@ int ima_store_template(struct ima_template_entry *entry, int violation, struct inode *inode, const unsigned char *filename, int pcr); void ima_free_template_entry(struct ima_template_entry *entry); -const char *ima_d_path(const struct path *path, char **pathbuf); +const char *ima_d_path(const struct path *path, char **pathbuf, char *filename); /* IMA policy related functions */ int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask, diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index 9df26a2b75ba..d01a52f8f708 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -318,7 +318,17 @@ void ima_audit_measurement(struct integrity_iint_cache *iint, iint->flags |= IMA_AUDITED; } -const char *ima_d_path(const struct path *path, char **pathbuf) +/* + * ima_d_path - return a pointer to the full pathname + * + * Attempt to return a pointer to the full pathname for use in the + * IMA measurement list, IMA audit records, and auditing logs. + * + * On failure, return a pointer to a copy of the filename, not dname. + * Returning a pointer to dname, could result in using the pointer + * after the memory has been freed. + */ +const char *ima_d_path(const struct path *path, char **pathbuf, char *namebuf) { char *pathname = NULL; @@ -331,5 +341,11 @@ const char *ima_d_path(const struct path *path, char **pathbuf) pathname = NULL; } } - return pathname ?: (const char *)path->dentry->d_name.name; + + if (!pathname) { + strlcpy(namebuf, path->dentry->d_name.name, NAME_MAX); + pathname = namebuf; + } + + return pathname; } diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 423d111b3b94..0e8762945e79 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -83,6 +83,7 @@ static void ima_rdwr_violation_check(struct file *file, const char **pathname) { struct inode *inode = file_inode(file); + char filename[NAME_MAX]; fmode_t mode = file->f_mode; bool send_tomtou = false, send_writers = false; @@ -102,7 +103,7 @@ static void ima_rdwr_violation_check(struct file *file, if (!send_tomtou && !send_writers) return; - *pathname = ima_d_path(&file->f_path, pathbuf); + *pathname = ima_d_path(&file->f_path, pathbuf, filename); if (send_tomtou) ima_add_violation(file, *pathname, iint, @@ -161,6 +162,7 @@ static int process_measurement(struct file *file, char *buf, loff_t size, struct integrity_iint_cache *iint = NULL; struct ima_template_desc *template_desc; char *pathbuf = NULL; + char filename[NAME_MAX]; const char *pathname = NULL; int rc = -ENOMEM, action, must_appraise; int pcr = CONFIG_IMA_MEASURE_PCR_IDX; @@ -239,8 +241,8 @@ static int process_measurement(struct file *file, char *buf, loff_t size, goto out_digsig; } - if (!pathname) /* ima_rdwr_violation possibly pre-fetched */ - pathname = ima_d_path(&file->f_path, &pathbuf); + if (!pathbuf) /* ima_rdwr_violation possibly pre-fetched */ + pathname = ima_d_path(&file->f_path, &pathbuf, filename); if (action & IMA_MEASURE) ima_store_measurement(iint, file, pathname, From 2294b771a4b425d9365aabeaff8d33ccd0c0fffe Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 31 Jan 2017 15:38:16 +0900 Subject: [PATCH 052/357] PM / devfreq: Fix available_governor sysfs commit bcf23c79c4e46130701370af4383b61a3cba755c upstream. The devfreq using passive governor is not able to change the governor. So, the user can not change the governor through 'available_governor' sysfs entry. Also, the devfreq which don't use the passive governor is not able to change to 'passive' governor on the fly. Fixes: 996133119f57 ("PM / devfreq: Add new passive governor") Signed-off-by: Chanwoo Choi Signed-off-by: MyungJoo Ham Signed-off-by: Greg Kroah-Hartman --- drivers/devfreq/devfreq.c | 31 ++++++++++++++++++++++++++---- drivers/devfreq/governor_passive.c | 1 + include/linux/devfreq.h | 3 +++ 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 712592cef1a2..2eacfb4b905f 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -939,6 +939,9 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr, if (df->governor == governor) { ret = 0; goto out; + } else if (df->governor->immutable || governor->immutable) { + ret = -EINVAL; + goto out; } if (df->governor) { @@ -968,13 +971,33 @@ static ssize_t available_governors_show(struct device *d, struct device_attribute *attr, char *buf) { - struct devfreq_governor *tmp_governor; + struct devfreq *df = to_devfreq(d); ssize_t count = 0; mutex_lock(&devfreq_list_lock); - list_for_each_entry(tmp_governor, &devfreq_governor_list, node) - count += scnprintf(&buf[count], (PAGE_SIZE - count - 2), - "%s ", tmp_governor->name); + + /* + * The devfreq with immutable governor (e.g., passive) shows + * only own governor. + */ + if (df->governor->immutable) { + count = scnprintf(&buf[count], DEVFREQ_NAME_LEN, + "%s ", df->governor_name); + /* + * The devfreq device shows the registered governor except for + * immutable governors such as passive governor . + */ + } else { + struct devfreq_governor *governor; + + list_for_each_entry(governor, &devfreq_governor_list, node) { + if (governor->immutable) + continue; + count += scnprintf(&buf[count], (PAGE_SIZE - count - 2), + "%s ", governor->name); + } + } + mutex_unlock(&devfreq_list_lock); /* Truncate the trailing space */ diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index 9ef46e2592c4..93795b32dc09 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -179,6 +179,7 @@ static int devfreq_passive_event_handler(struct devfreq *devfreq, static struct devfreq_governor devfreq_passive = { .name = "passive", + .immutable = 1, .get_target_freq = devfreq_passive_get_target_freq, .event_handler = devfreq_passive_event_handler, }; diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 2de4e2eea180..e0acb0e5243b 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -104,6 +104,8 @@ struct devfreq_dev_profile { * struct devfreq_governor - Devfreq policy governor * @node: list node - contains registered devfreq governors * @name: Governor's name + * @immutable: Immutable flag for governor. If the value is 1, + * this govenror is never changeable to other governor. * @get_target_freq: Returns desired operating frequency for the device. * Basically, get_target_freq will run * devfreq_dev_profile.get_dev_status() to get the @@ -121,6 +123,7 @@ struct devfreq_governor { struct list_head node; const char name[DEVFREQ_NAME_LEN]; + const unsigned int immutable; int (*get_target_freq)(struct devfreq *this, unsigned long *freq); int (*event_handler)(struct devfreq *devfreq, unsigned int event, void *data); From fe8f92c7beba2f1c6b1235e52e199eafbe4aa39a Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 31 Jan 2017 15:38:17 +0900 Subject: [PATCH 053/357] PM / devfreq: Fix wrong trans_stat of passive devfreq device commit 30582c25a4b4e0a5e456a309fde79b845e9473b2 upstream. Until now, the trans_stat information of passive devfreq is not updated. This patch updates the trans_stat information after setting the target frequency of passive devfreq device. Fixes: 996133119f57 ("PM / devfreq: Add new passive governor") Signed-off-by: Chanwoo Choi Signed-off-by: MyungJoo Ham Signed-off-by: Greg Kroah-Hartman --- drivers/devfreq/devfreq.c | 3 ++- drivers/devfreq/governor.h | 2 ++ drivers/devfreq/governor_passive.c | 5 +++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 2eacfb4b905f..7309c0824887 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -130,7 +130,7 @@ static void devfreq_set_freq_table(struct devfreq *devfreq) * @devfreq: the devfreq instance * @freq: the update target frequency */ -static int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) +int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) { int lev, prev_lev, ret = 0; unsigned long cur_time; @@ -166,6 +166,7 @@ out: devfreq->last_stat_updated = cur_time; return ret; } +EXPORT_SYMBOL(devfreq_update_status); /** * find_devfreq_governor() - find devfreq governor from name diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h index fad7d6321978..71576b8bdfef 100644 --- a/drivers/devfreq/governor.h +++ b/drivers/devfreq/governor.h @@ -38,4 +38,6 @@ extern void devfreq_interval_update(struct devfreq *devfreq, extern int devfreq_add_governor(struct devfreq_governor *governor); extern int devfreq_remove_governor(struct devfreq_governor *governor); +extern int devfreq_update_status(struct devfreq *devfreq, unsigned long freq); + #endif /* _GOVERNOR_H */ diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index 93795b32dc09..5be96b2249e7 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -112,6 +112,11 @@ static int update_devfreq_passive(struct devfreq *devfreq, unsigned long freq) if (ret < 0) goto out; + if (devfreq->profile->freq_table + && (devfreq_update_status(devfreq, freq))) + dev_err(&devfreq->dev, + "Couldn't update frequency transition information.\n"); + devfreq->previous_freq = freq; out: From 9987feba902cbd6429cf3b3a3b6614aa23a555c2 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 9 Feb 2017 11:46:18 -0500 Subject: [PATCH 054/357] dm cache: fix corruption seen when using cache > 2TB commit ca763d0a53b264a650342cee206512bc92ac7050 upstream. A rounding bug due to compiler generated temporary being 32bit was found in remap_to_cache(). A localized cast in remap_to_cache() fixes the corruption but this preferred fix (changing from uint32_t to sector_t) eliminates potential for future rounding errors elsewhere. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-target.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 59b2c50562e4..c817627d09ca 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -248,7 +248,7 @@ struct cache { /* * Fields for converting from sectors to blocks. */ - uint32_t sectors_per_block; + sector_t sectors_per_block; int sectors_per_block_shift; spinlock_t lock; @@ -3546,11 +3546,11 @@ static void cache_status(struct dm_target *ti, status_type_t type, residency = policy_residency(cache->policy); - DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ", + DMEMIT("%u %llu/%llu %llu %llu/%llu %u %u %u %u %u %u %lu ", (unsigned)DM_CACHE_METADATA_BLOCK_SIZE, (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), (unsigned long long)nr_blocks_metadata, - cache->sectors_per_block, + (unsigned long long)cache->sectors_per_block, (unsigned long long) from_cblock(residency), (unsigned long long) from_cblock(cache->cache_size), (unsigned) atomic_read(&cache->stats.read_hit), From bad6c16b81b68d81e95ada246649dacc3cdd5e1d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 15 Feb 2017 12:06:19 -0500 Subject: [PATCH 055/357] dm stats: fix a leaked s->histogram_boundaries array commit 6085831883c25860264721df15f05bbded45e2a2 upstream. Fixes: dfcfac3e4cd9 ("dm stats: collect and report histogram of IO latencies") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-stats.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 38b05f23b96c..0250e7e521ab 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -175,6 +175,7 @@ static void dm_stat_free(struct rcu_head *head) int cpu; struct dm_stat *s = container_of(head, struct dm_stat, rcu_head); + kfree(s->histogram_boundaries); kfree(s->program_id); kfree(s->aux_data); for_each_possible_cpu(cpu) { From b7f874eedc9308a43b0d29e2a12f848be724fca7 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 16 Feb 2017 23:57:17 -0500 Subject: [PATCH 056/357] dm round robin: revert "use percpu 'repeat_count' and 'current_path'" commit 37a098e9d10db6e2efc05fe61e3a6ff2e9802c53 upstream. The sloppy nature of lockless access to percpu pointers (s->current_path) in rr_select_path(), from multiple threads, is causing some paths to used more than others -- which results in less IO performance being observed. Revert these upstream commits to restore truly symmetric round-robin IO submission in DM multipath: b0b477c dm round robin: use percpu 'repeat_count' and 'current_path' 802934b dm round robin: do not use this_cpu_ptr() without having preemption disabled There is no benefit to all this complexity if repeat_count = 1 (which is the recommended default). Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-round-robin.c | 67 ++++++++----------------------------- 1 file changed, 14 insertions(+), 53 deletions(-) diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-round-robin.c index 6c25213ab38c..bdbb7e6e8212 100644 --- a/drivers/md/dm-round-robin.c +++ b/drivers/md/dm-round-robin.c @@ -17,8 +17,8 @@ #include #define DM_MSG_PREFIX "multipath round-robin" -#define RR_MIN_IO 1000 -#define RR_VERSION "1.1.0" +#define RR_MIN_IO 1 +#define RR_VERSION "1.2.0" /*----------------------------------------------------------------- * Path-handling code, paths are held in lists @@ -47,44 +47,19 @@ struct selector { struct list_head valid_paths; struct list_head invalid_paths; spinlock_t lock; - struct dm_path * __percpu *current_path; - struct percpu_counter repeat_count; }; -static void set_percpu_current_path(struct selector *s, struct dm_path *path) -{ - int cpu; - - for_each_possible_cpu(cpu) - *per_cpu_ptr(s->current_path, cpu) = path; -} - static struct selector *alloc_selector(void) { struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL); - if (!s) - return NULL; - - INIT_LIST_HEAD(&s->valid_paths); - INIT_LIST_HEAD(&s->invalid_paths); - spin_lock_init(&s->lock); - - s->current_path = alloc_percpu(struct dm_path *); - if (!s->current_path) - goto out_current_path; - set_percpu_current_path(s, NULL); - - if (percpu_counter_init(&s->repeat_count, 0, GFP_KERNEL)) - goto out_repeat_count; + if (s) { + INIT_LIST_HEAD(&s->valid_paths); + INIT_LIST_HEAD(&s->invalid_paths); + spin_lock_init(&s->lock); + } return s; - -out_repeat_count: - free_percpu(s->current_path); -out_current_path: - kfree(s); - return NULL;; } static int rr_create(struct path_selector *ps, unsigned argc, char **argv) @@ -105,8 +80,6 @@ static void rr_destroy(struct path_selector *ps) free_paths(&s->valid_paths); free_paths(&s->invalid_paths); - free_percpu(s->current_path); - percpu_counter_destroy(&s->repeat_count); kfree(s); ps->context = NULL; } @@ -157,6 +130,11 @@ static int rr_add_path(struct path_selector *ps, struct dm_path *path, return -EINVAL; } + if (repeat_count > 1) { + DMWARN_LIMIT("repeat_count > 1 is deprecated, using 1 instead"); + repeat_count = 1; + } + /* allocate the path */ pi = kmalloc(sizeof(*pi), GFP_KERNEL); if (!pi) { @@ -183,9 +161,6 @@ static void rr_fail_path(struct path_selector *ps, struct dm_path *p) struct path_info *pi = p->pscontext; spin_lock_irqsave(&s->lock, flags); - if (p == *this_cpu_ptr(s->current_path)) - set_percpu_current_path(s, NULL); - list_move(&pi->list, &s->invalid_paths); spin_unlock_irqrestore(&s->lock, flags); } @@ -208,29 +183,15 @@ static struct dm_path *rr_select_path(struct path_selector *ps, size_t nr_bytes) unsigned long flags; struct selector *s = ps->context; struct path_info *pi = NULL; - struct dm_path *current_path = NULL; - local_irq_save(flags); - current_path = *this_cpu_ptr(s->current_path); - if (current_path) { - percpu_counter_dec(&s->repeat_count); - if (percpu_counter_read_positive(&s->repeat_count) > 0) { - local_irq_restore(flags); - return current_path; - } - } - - spin_lock(&s->lock); + spin_lock_irqsave(&s->lock, flags); if (!list_empty(&s->valid_paths)) { pi = list_entry(s->valid_paths.next, struct path_info, list); list_move_tail(&pi->list, &s->valid_paths); - percpu_counter_set(&s->repeat_count, pi->repeat_count); - set_percpu_current_path(s, pi->path); - current_path = pi->path; } spin_unlock_irqrestore(&s->lock, flags); - return current_path; + return pi ? pi->path : NULL; } static struct path_selector_type rr_ps = { From 2937e22c2314cdfe31d82e2cc36af4be863be978 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Tue, 28 Feb 2017 19:17:49 +0100 Subject: [PATCH 057/357] dm raid: fix data corruption on reshape request commit d36a19541fe8f392778ac137d60f9be8dfdd8f9d upstream. The lvm2 sequence to manage dm-raid constructor flags that trigger a rebuild or a reshape is defined as: 1) load table with flags (e.g. rebuild/delta_disks/data_offset) 2) clear out the flags in lvm2 metadata 3) store the lvm2 metadata, reload the table to reset the flags previously established during the initial load (1) -- in order to prevent repeatedly requesting a rebuild or a reshape on activation Currently, loading an inactive table with rebuild/reshape flags specified will cause dm-raid to rebuild/reshape on resume and thus start updating the raid metadata (about the progress). When the second table reload, to reset the flags, occurs the constructor accesses the volatile progress state kept in the raid superblocks. Because the active mapping is still processing the rebuild/reshape, that position will be stale by the time the device is resumed. In the reshape case, this causes data corruption by processing already reshaped stripes again. In the rebuild case, it does _not_ cause data corruption but instead involves superfluous rebuilds. Fix by keeping the raid set frozen during the first resume and then allow the rebuild/reshape during the second resume. Fixes: 9dbd1aa3a ("dm raid: add reshaping support to the target") Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-raid.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index af2d79b52484..15daa36fcea6 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3621,6 +3621,8 @@ static int raid_preresume(struct dm_target *ti) return r; } +#define RESUME_STAY_FROZEN_FLAGS (CTR_FLAG_DELTA_DISKS | CTR_FLAG_DATA_OFFSET) + static void raid_resume(struct dm_target *ti) { struct raid_set *rs = ti->private; @@ -3638,7 +3640,15 @@ static void raid_resume(struct dm_target *ti) mddev->ro = 0; mddev->in_sync = 0; - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + /* + * Keep the RAID set frozen if reshape/rebuild flags are set. + * The RAID set is unfrozen once the next table load/resume, + * which clears the reshape/rebuild flags, occurs. + * This ensures that the constructor for the inactive table + * retrieves an up-to-date reshape_position. + */ + if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS)) + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); if (mddev->suspended) mddev_resume(mddev); From 27f5ef378d2d56dad3c1a4c93c27fc5ce7c6eda0 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 14 Dec 2016 18:46:01 -0800 Subject: [PATCH 058/357] scsi: storvsc: use tagged SRB requests if supported by the device commit 3cd6d3d9b1abab8dcdf0800224ce26daac24eea2 upstream. Properly set SRB flags when hosting device supports tagged queuing. This patch improves the performance on Fiber Channel disks. Signed-off-by: Long Li Reviewed-by: K. Y. Srinivasan Signed-off-by: K. Y. Srinivasan Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/storvsc_drv.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 8ccfc9ea874b..c193629ac060 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -136,6 +136,8 @@ struct hv_fc_wwn_packet { #define SRB_FLAGS_PORT_DRIVER_RESERVED 0x0F000000 #define SRB_FLAGS_CLASS_DRIVER_RESERVED 0xF0000000 +#define SP_UNTAGGED ((unsigned char) ~0) +#define SRB_SIMPLE_TAG_REQUEST 0x20 /* * Platform neutral description of a scsi request - @@ -1451,6 +1453,13 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) vm_srb->win8_extension.srb_flags |= SRB_FLAGS_DISABLE_SYNCH_TRANSFER; + if (scmnd->device->tagged_supported) { + vm_srb->win8_extension.srb_flags |= + (SRB_FLAGS_QUEUE_ACTION_ENABLE | SRB_FLAGS_NO_QUEUE_FREEZE); + vm_srb->win8_extension.queue_tag = SP_UNTAGGED; + vm_srb->win8_extension.queue_action = SRB_SIMPLE_TAG_REQUEST; + } + /* Build the SRB */ switch (scmnd->sc_data_direction) { case DMA_TO_DEVICE: From e59693753e08c70a4da76e951ba4279003c1228e Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 14 Dec 2016 18:46:02 -0800 Subject: [PATCH 059/357] scsi: storvsc: properly handle SRB_ERROR when sense message is present commit bba5dc332ec2d3a685cb4dae668c793f6a3713a3 upstream. When sense message is present on error, we should pass along to the upper layer to decide how to deal with the error. This patch fixes connectivity issues with Fiber Channel devices. Signed-off-by: Long Li Reviewed-by: K. Y. Srinivasan Signed-off-by: K. Y. Srinivasan Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/storvsc_drv.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index c193629ac060..5d71a661ee67 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -890,6 +890,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, switch (SRB_STATUS(vm_srb->srb_status)) { case SRB_STATUS_ERROR: + /* + * Let upper layer deal with error when + * sense message is present. + */ + + if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) + break; /* * If there is an error; offline the device since all * error recovery strategies would have already been From a50781fe6edabe33ecdef862fd9e7f2f35277de6 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 14 Dec 2016 18:46:03 -0800 Subject: [PATCH 060/357] scsi: storvsc: properly set residual data length on errors commit 40630f462824ee24bc00d692865c86c3828094e0 upstream. On I/O errors, the Windows driver doesn't set data_transfer_length on error conditions other than SRB_STATUS_DATA_OVERRUN. In these cases we need to set data_transfer_length to 0, indicating there is no data transferred. On SRB_STATUS_DATA_OVERRUN, data_transfer_length is set by the Windows driver to the actual data transferred. Reported-by: Shiva Krishna Signed-off-by: Long Li Reviewed-by: K. Y. Srinivasan Signed-off-by: K. Y. Srinivasan Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/storvsc_drv.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 5d71a661ee67..3f218f5cf29b 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -377,6 +377,7 @@ enum storvsc_request_type { #define SRB_STATUS_SUCCESS 0x01 #define SRB_STATUS_ABORTED 0x02 #define SRB_STATUS_ERROR 0x04 +#define SRB_STATUS_DATA_OVERRUN 0x12 #define SRB_STATUS(status) \ (status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN)) @@ -962,6 +963,7 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request, struct scsi_cmnd *scmnd = cmd_request->cmd; struct scsi_sense_hdr sense_hdr; struct vmscsi_request *vm_srb; + u32 data_transfer_length; struct Scsi_Host *host; u32 payload_sz = cmd_request->payload_sz; void *payload = cmd_request->payload; @@ -969,6 +971,7 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request, host = stor_dev->host; vm_srb = &cmd_request->vstor_packet.vm_srb; + data_transfer_length = vm_srb->data_transfer_length; scmnd->result = vm_srb->scsi_status; @@ -982,13 +985,20 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request, &sense_hdr); } - if (vm_srb->srb_status != SRB_STATUS_SUCCESS) + if (vm_srb->srb_status != SRB_STATUS_SUCCESS) { storvsc_handle_error(vm_srb, scmnd, host, sense_hdr.asc, sense_hdr.ascq); + /* + * The Windows driver set data_transfer_length on + * SRB_STATUS_DATA_OVERRUN. On other errors, this value + * is untouched. In these cases we set it to 0. + */ + if (vm_srb->srb_status != SRB_STATUS_DATA_OVERRUN) + data_transfer_length = 0; + } scsi_set_resid(scmnd, - cmd_request->payload->range.len - - vm_srb->data_transfer_length); + cmd_request->payload->range.len - data_transfer_length); scmnd->scsi_done(scmnd); From 73f5176ecac265b03dcd8ef0aaed6950e5ecb828 Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Thu, 16 Feb 2017 12:51:21 -0800 Subject: [PATCH 061/357] scsi: aacraid: Reorder Adapter status check commit c421530bf848604e97d0785a03b3fe2c62775083 upstream. The driver currently checks the SELF_TEST_FAILED first and then KERNEL_PANIC next. Under error conditions(boot code failure) both SELF_TEST_FAILED and KERNEL_PANIC can be set at the same time. The driver has the capability to reset the controller on an KERNEL_PANIC, but not on SELF_TEST_FAILED. Fixed by first checking KERNEL_PANIC and then the others. Fixes: e8b12f0fb835223752 ([SCSI] aacraid: Add new code for PMC-Sierra's SRC base controller family) Signed-off-by: Raghava Aditya Renukunta Reviewed-by: David Carroll Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aacraid/src.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index 0c453880f214..7b178d765726 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -413,17 +413,24 @@ static int aac_src_check_health(struct aac_dev *dev) { u32 status = src_readl(dev, MUnit.OMR); - /* - * Check to see if the board failed any self tests. - */ - if (unlikely(status & SELF_TEST_FAILED)) - return -1; - /* * Check to see if the board panic'd. */ if (unlikely(status & KERNEL_PANIC)) - return (status >> 16) & 0xFF; + goto err_blink; + + /* + * Check to see if the board failed any self tests. + */ + if (unlikely(status & SELF_TEST_FAILED)) + goto err_out; + + /* + * Check to see if the board failed any self tests. + */ + if (unlikely(status & MONITOR_PANIC)) + goto err_out; + /* * Wait for the adapter to be up and running. */ @@ -433,6 +440,12 @@ static int aac_src_check_health(struct aac_dev *dev) * Everything is OK */ return 0; + +err_out: + return -1; + +err_blink: + return (status > 16) & 0xFF; } /** From e9dc8334d765f7910114921c63a0324272f1f636 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 17 Feb 2017 09:02:45 +0100 Subject: [PATCH 062/357] scsi: use 'scsi_device_from_queue()' for scsi_dh commit 857de6e00778738dc3d61f75acbac35bdc48e533 upstream. The device handler needs to check if a given queue belongs to a scsi device; only then does it make sense to attach a device handler. [mkp: dropped flags] Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_dh.c | 22 ++++------------------ drivers/scsi/scsi_lib.c | 23 +++++++++++++++++++++++ include/scsi/scsi_device.h | 1 + 3 files changed, 28 insertions(+), 18 deletions(-) diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c index b8d3b97b217a..84addee05be6 100644 --- a/drivers/scsi/scsi_dh.c +++ b/drivers/scsi/scsi_dh.c @@ -219,20 +219,6 @@ int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh) } EXPORT_SYMBOL_GPL(scsi_unregister_device_handler); -static struct scsi_device *get_sdev_from_queue(struct request_queue *q) -{ - struct scsi_device *sdev; - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - sdev = q->queuedata; - if (!sdev || !get_device(&sdev->sdev_gendev)) - sdev = NULL; - spin_unlock_irqrestore(q->queue_lock, flags); - - return sdev; -} - /* * scsi_dh_activate - activate the path associated with the scsi_device * corresponding to the given request queue. @@ -251,7 +237,7 @@ int scsi_dh_activate(struct request_queue *q, activate_complete fn, void *data) struct scsi_device *sdev; int err = SCSI_DH_NOSYS; - sdev = get_sdev_from_queue(q); + sdev = scsi_device_from_queue(q); if (!sdev) { if (fn) fn(data, err); @@ -298,7 +284,7 @@ int scsi_dh_set_params(struct request_queue *q, const char *params) struct scsi_device *sdev; int err = -SCSI_DH_NOSYS; - sdev = get_sdev_from_queue(q); + sdev = scsi_device_from_queue(q); if (!sdev) return err; @@ -321,7 +307,7 @@ int scsi_dh_attach(struct request_queue *q, const char *name) struct scsi_device_handler *scsi_dh; int err = 0; - sdev = get_sdev_from_queue(q); + sdev = scsi_device_from_queue(q); if (!sdev) return -ENODEV; @@ -359,7 +345,7 @@ const char *scsi_dh_attached_handler_name(struct request_queue *q, gfp_t gfp) struct scsi_device *sdev; const char *handler_name = NULL; - sdev = get_sdev_from_queue(q); + sdev = scsi_device_from_queue(q); if (!sdev) return NULL; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e64eae4392a4..d8099c7cab00 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2127,6 +2127,29 @@ void scsi_mq_destroy_tags(struct Scsi_Host *shost) blk_mq_free_tag_set(&shost->tag_set); } +/** + * scsi_device_from_queue - return sdev associated with a request_queue + * @q: The request queue to return the sdev from + * + * Return the sdev associated with a request queue or NULL if the + * request_queue does not reference a SCSI device. + */ +struct scsi_device *scsi_device_from_queue(struct request_queue *q) +{ + struct scsi_device *sdev = NULL; + + if (q->mq_ops) { + if (q->mq_ops == &scsi_mq_ops) + sdev = q->queuedata; + } else if (q->request_fn == scsi_request_fn) + sdev = q->queuedata; + if (!sdev || !get_device(&sdev->sdev_gendev)) + sdev = NULL; + + return sdev; +} +EXPORT_SYMBOL_GPL(scsi_device_from_queue); + /* * Function: scsi_block_requests() * diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 8a9563144890..b9ec4939b80c 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -315,6 +315,7 @@ extern void scsi_remove_device(struct scsi_device *); extern int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh); void scsi_attach_vpd(struct scsi_device *sdev); +extern struct scsi_device *scsi_device_from_queue(struct request_queue *q); extern int scsi_device_get(struct scsi_device *); extern void scsi_device_put(struct scsi_device *); extern struct scsi_device *scsi_device_lookup(struct Scsi_Host *, From 206af3d97f00d5c96de5bfd5a48045d444b3eec4 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 25 Oct 2016 11:37:59 +0200 Subject: [PATCH 063/357] power: reset: at91-poweroff: timely shutdown LPDDR memories commit 0b0408745e7ff24757cbfd571d69026c0ddb803c upstream. LPDDR memories can only handle up to 400 uncontrolled power off. Ensure the proper power off sequence is used before shutting down the platform. Signed-off-by: Alexandre Belloni Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/reset/Kconfig | 2 +- drivers/power/reset/at91-poweroff.c | 54 +++++++++++++++++++++++- drivers/power/reset/at91-sama5d2_shdwc.c | 49 ++++++++++++++++++++- 3 files changed, 102 insertions(+), 3 deletions(-) diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig index c74c3f67b8da..02e46bbcf45d 100644 --- a/drivers/power/reset/Kconfig +++ b/drivers/power/reset/Kconfig @@ -32,7 +32,7 @@ config POWER_RESET_AT91_RESET config POWER_RESET_AT91_SAMA5D2_SHDWC tristate "Atmel AT91 SAMA5D2-Compatible shutdown controller driver" - depends on ARCH_AT91 || COMPILE_TEST + depends on ARCH_AT91 default SOC_SAMA5 help This driver supports the alternate shutdown controller for some Atmel diff --git a/drivers/power/reset/at91-poweroff.c b/drivers/power/reset/at91-poweroff.c index e9e24df35f26..2579f025b90b 100644 --- a/drivers/power/reset/at91-poweroff.c +++ b/drivers/power/reset/at91-poweroff.c @@ -14,9 +14,12 @@ #include #include #include +#include #include #include +#include + #define AT91_SHDW_CR 0x00 /* Shut Down Control Register */ #define AT91_SHDW_SHDW BIT(0) /* Shut Down command */ #define AT91_SHDW_KEY (0xa5 << 24) /* KEY Password */ @@ -50,6 +53,7 @@ static const char *shdwc_wakeup_modes[] = { static void __iomem *at91_shdwc_base; static struct clk *sclk; +static void __iomem *mpddrc_base; static void __init at91_wakeup_status(void) { @@ -73,6 +77,29 @@ static void at91_poweroff(void) writel(AT91_SHDW_KEY | AT91_SHDW_SHDW, at91_shdwc_base + AT91_SHDW_CR); } +static void at91_lpddr_poweroff(void) +{ + asm volatile( + /* Align to cache lines */ + ".balign 32\n\t" + + /* Ensure AT91_SHDW_CR is in the TLB by reading it */ + " ldr r6, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t" + + /* Power down SDRAM0 */ + " str %1, [%0, #" __stringify(AT91_DDRSDRC_LPR) "]\n\t" + /* Shutdown CPU */ + " str %3, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t" + + " b .\n\t" + : + : "r" (mpddrc_base), + "r" cpu_to_le32(AT91_DDRSDRC_LPDDR2_PWOFF), + "r" (at91_shdwc_base), + "r" cpu_to_le32(AT91_SHDW_KEY | AT91_SHDW_SHDW) + : "r0"); +} + static int at91_poweroff_get_wakeup_mode(struct device_node *np) { const char *pm; @@ -124,6 +151,8 @@ static void at91_poweroff_dt_set_wakeup_mode(struct platform_device *pdev) static int __init at91_poweroff_probe(struct platform_device *pdev) { struct resource *res; + struct device_node *np; + u32 ddr_type; int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -150,12 +179,30 @@ static int __init at91_poweroff_probe(struct platform_device *pdev) pm_power_off = at91_poweroff; + np = of_find_compatible_node(NULL, NULL, "atmel,sama5d3-ddramc"); + if (!np) + return 0; + + mpddrc_base = of_iomap(np, 0); + of_node_put(np); + + if (!mpddrc_base) + return 0; + + ddr_type = readl(mpddrc_base + AT91_DDRSDRC_MDR) & AT91_DDRSDRC_MD; + if ((ddr_type == AT91_DDRSDRC_MD_LPDDR2) || + (ddr_type == AT91_DDRSDRC_MD_LPDDR3)) + pm_power_off = at91_lpddr_poweroff; + else + iounmap(mpddrc_base); + return 0; } static int __exit at91_poweroff_remove(struct platform_device *pdev) { - if (pm_power_off == at91_poweroff) + if (pm_power_off == at91_poweroff || + pm_power_off == at91_lpddr_poweroff) pm_power_off = NULL; clk_disable_unprepare(sclk); @@ -163,6 +210,11 @@ static int __exit at91_poweroff_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id at91_ramc_of_match[] = { + { .compatible = "atmel,sama5d3-ddramc", }, + { /* sentinel */ } +}; + static const struct of_device_id at91_poweroff_of_match[] = { { .compatible = "atmel,at91sam9260-shdwc", }, { .compatible = "atmel,at91sam9rl-shdwc", }, diff --git a/drivers/power/reset/at91-sama5d2_shdwc.c b/drivers/power/reset/at91-sama5d2_shdwc.c index 8a5ac9706c9c..90b0b5a70ce5 100644 --- a/drivers/power/reset/at91-sama5d2_shdwc.c +++ b/drivers/power/reset/at91-sama5d2_shdwc.c @@ -22,9 +22,12 @@ #include #include #include +#include #include #include +#include + #define SLOW_CLOCK_FREQ 32768 #define AT91_SHDW_CR 0x00 /* Shut Down Control Register */ @@ -75,6 +78,7 @@ struct shdwc { */ static struct shdwc *at91_shdwc; static struct clk *sclk; +static void __iomem *mpddrc_base; static const unsigned long long sdwc_dbc_period[] = { 0, 3, 32, 512, 4096, 32768, @@ -108,6 +112,29 @@ static void at91_poweroff(void) at91_shdwc->at91_shdwc_base + AT91_SHDW_CR); } +static void at91_lpddr_poweroff(void) +{ + asm volatile( + /* Align to cache lines */ + ".balign 32\n\t" + + /* Ensure AT91_SHDW_CR is in the TLB by reading it */ + " ldr r6, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t" + + /* Power down SDRAM0 */ + " str %1, [%0, #" __stringify(AT91_DDRSDRC_LPR) "]\n\t" + /* Shutdown CPU */ + " str %3, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t" + + " b .\n\t" + : + : "r" (mpddrc_base), + "r" cpu_to_le32(AT91_DDRSDRC_LPDDR2_PWOFF), + "r" (at91_shdwc->at91_shdwc_base), + "r" cpu_to_le32(AT91_SHDW_KEY | AT91_SHDW_SHDW) + : "r0"); +} + static u32 at91_shdwc_debouncer_value(struct platform_device *pdev, u32 in_period_us) { @@ -212,6 +239,8 @@ static int __init at91_shdwc_probe(struct platform_device *pdev) { struct resource *res; const struct of_device_id *match; + struct device_node *np; + u32 ddr_type; int ret; if (!pdev->dev.of_node) @@ -249,6 +278,23 @@ static int __init at91_shdwc_probe(struct platform_device *pdev) pm_power_off = at91_poweroff; + np = of_find_compatible_node(NULL, NULL, "atmel,sama5d3-ddramc"); + if (!np) + return 0; + + mpddrc_base = of_iomap(np, 0); + of_node_put(np); + + if (!mpddrc_base) + return 0; + + ddr_type = readl(mpddrc_base + AT91_DDRSDRC_MDR) & AT91_DDRSDRC_MD; + if ((ddr_type == AT91_DDRSDRC_MD_LPDDR2) || + (ddr_type == AT91_DDRSDRC_MD_LPDDR3)) + pm_power_off = at91_lpddr_poweroff; + else + iounmap(mpddrc_base); + return 0; } @@ -256,7 +302,8 @@ static int __exit at91_shdwc_remove(struct platform_device *pdev) { struct shdwc *shdw = platform_get_drvdata(pdev); - if (pm_power_off == at91_poweroff) + if (pm_power_off == at91_poweroff || + pm_power_off == at91_lpddr_poweroff) pm_power_off = NULL; /* Reset values to disable wake-up features */ From 3de5a92847c1d87d86ee945e6002f2051e02e868 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 3 Nov 2016 10:29:28 -0600 Subject: [PATCH 064/357] Fix: Disable sys_membarrier when nohz_full is enabled commit 907565337ebf998a68cb5c5b2174ce5e5da065eb upstream. Userspace applications should be allowed to expect the membarrier system call with MEMBARRIER_CMD_SHARED command to issue memory barriers on nohz_full CPUs, but synchronize_sched() does not take those into account. Given that we do not want unrelated processes to be able to affect real-time sensitive nohz_full CPUs, simply return ENOSYS when membarrier is invoked on a kernel with enabled nohz_full CPUs. Signed-off-by: Mathieu Desnoyers CC: Josh Triplett CC: Steven Rostedt Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Cc: Chris Metcalf Cc: Rik van Riel Acked-by: Lai Jiangshan Reviewed-by: Josh Triplett Signed-off-by: Greg Kroah-Hartman --- kernel/membarrier.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/membarrier.c b/kernel/membarrier.c index 536c727a56e9..9f9284f37f8d 100644 --- a/kernel/membarrier.c +++ b/kernel/membarrier.c @@ -16,6 +16,7 @@ #include #include +#include /* * Bitmask made from a "or" of all commands within enum membarrier_cmd, @@ -51,6 +52,9 @@ */ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) { + /* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */ + if (tick_nohz_full_enabled()) + return -ENOSYS; if (unlikely(flags)) return -EINVAL; switch (cmd) { From a9b0c14ba1e4be94036c5725c144818a2d3be191 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 4 Feb 2017 23:14:19 -0500 Subject: [PATCH 065/357] jbd2: don't leak modified metadata buffers on an aborted journal commit e112666b4959b25a8552d63bc564e1059be703e8 upstream. If the journal has been aborted, we shouldn't mark the underlying buffer head as dirty, since that will cause the metadata block to get modified. And if the journal has been aborted, we shouldn't allow this since it will almost certainly lead to a corrupted file system. Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/transaction.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e1652665bd93..5e659ee08d6a 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1863,7 +1863,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) __blist_del_buffer(list, jh); jh->b_jlist = BJ_None; - if (test_clear_buffer_jbddirty(bh)) + if (transaction && is_journal_aborted(transaction->t_journal)) + clear_buffer_jbddirty(bh); + else if (test_clear_buffer_jbddirty(bh)) mark_buffer_dirty(bh); /* Expose it to the VM */ } From 50447afd96145b6d3d7aea412da178dce95f1f9b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 11 Feb 2017 11:40:45 +0800 Subject: [PATCH 066/357] block/loop: fix race between I/O and set_status commit ecdd09597a57251323b0de50e3d45e69298c4a83 upstream. Inside set_status, transfer need to setup again, so we have to drain IO before the transition, otherwise oops may be triggered like the following: divide error: 0000 [#1] SMP KASAN CPU: 0 PID: 2935 Comm: loop7 Not tainted 4.10.0-rc7+ #213 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88006ba1e840 task.stack: ffff880067338000 RIP: 0010:transfer_xor+0x1d1/0x440 drivers/block/loop.c:110 RSP: 0018:ffff88006733f108 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8800688d7000 RCX: 0000000000000059 RDX: 0000000000000000 RSI: 1ffff1000d743f43 RDI: ffff880068891c08 RBP: ffff88006733f160 R08: ffff8800688d7001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800688d7000 R13: ffff880067b7d000 R14: dffffc0000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88006d000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000006c17e0 CR3: 0000000066e3b000 CR4: 00000000001406f0 Call Trace: lo_do_transfer drivers/block/loop.c:251 [inline] lo_read_transfer drivers/block/loop.c:392 [inline] do_req_filebacked drivers/block/loop.c:541 [inline] loop_handle_cmd drivers/block/loop.c:1677 [inline] loop_queue_work+0xda0/0x49b0 drivers/block/loop.c:1689 kthread_worker_fn+0x4c3/0xa30 kernel/kthread.c:630 kthread+0x326/0x3f0 kernel/kthread.c:227 ret_from_fork+0x31/0x40 arch/x86/entry/entry_64.S:430 Code: 03 83 e2 07 41 29 df 42 0f b6 04 30 4d 8d 44 24 01 38 d0 7f 08 84 c0 0f 85 62 02 00 00 44 89 f8 41 0f b6 48 ff 25 ff 01 00 00 99 7d c8 48 63 d2 48 03 55 d0 48 89 d0 48 89 d7 48 c1 e8 03 83 RIP: transfer_xor+0x1d1/0x440 drivers/block/loop.c:110 RSP: ffff88006733f108 ---[ end trace 0166f7bd3b0c0933 ]--- Reported-by: Dmitry Vyukov Signed-off-by: Ming Lei Tested-by: Dmitry Vyukov Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 4af818766797..ccc3acc0c5ba 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1097,9 +1097,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) return -EINVAL; + /* I/O need to be drained during transfer transition */ + blk_mq_freeze_queue(lo->lo_queue); + err = loop_release_xfer(lo); if (err) - return err; + goto exit; if (info->lo_encrypt_type) { unsigned int type = info->lo_encrypt_type; @@ -1114,12 +1117,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) err = loop_init_xfer(lo, xfer, info); if (err) - return err; + goto exit; if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) - if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) - return -EFBIG; + if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { + err = -EFBIG; + goto exit; + } loop_config_discard(lo); @@ -1156,7 +1161,9 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) /* update dio if lo_offset or transfer is changed */ __loop_update_dio(lo, lo->use_dio); - return 0; + exit: + blk_mq_unfreeze_queue(lo->lo_queue); + return err; } static int From 8ca25e39ec2d67df1f69f08c1d4ad0d6310a5c5b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 1 Mar 2017 10:42:38 -0800 Subject: [PATCH 067/357] loop: fix LO_FLAGS_PARTSCAN hang commit e02898b423802b1f3a3aaa7f16e896da069ba8f7 upstream. loop_reread_partitions() needs to do I/O, but we just froze the queue, so we end up waiting forever. This can easily be reproduced with losetup -P. Fix it by moving the reread to after we unfreeze the queue. Fixes: ecdd09597a57 ("block/loop: fix race between I/O and set_status") Reported-by: Tejun Heo Signed-off-by: Omar Sandoval Reviewed-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ccc3acc0c5ba..24d6cefceb32 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1142,13 +1142,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) (info->lo_flags & LO_FLAGS_AUTOCLEAR)) lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; - if ((info->lo_flags & LO_FLAGS_PARTSCAN) && - !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { - lo->lo_flags |= LO_FLAGS_PARTSCAN; - lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; - loop_reread_partitions(lo, lo->lo_device); - } - lo->lo_encrypt_key_size = info->lo_encrypt_key_size; lo->lo_init[0] = info->lo_init[0]; lo->lo_init[1] = info->lo_init[1]; @@ -1163,6 +1156,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) exit: blk_mq_unfreeze_queue(lo->lo_queue); + + if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) && + !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { + lo->lo_flags |= LO_FLAGS_PARTSCAN; + lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; + loop_reread_partitions(lo, lo->lo_device); + } + return err; } From 72ae476d0401c38c912ad740cadcc7ef302f5ef2 Mon Sep 17 00:00:00 2001 From: Roman Pen Date: Sun, 8 Jan 2017 20:59:35 -0500 Subject: [PATCH 068/357] ext4: Include forgotten start block on fallocate insert range commit 2a9b8cba62c0741109c33a2be700ff3d7703a7c2 upstream. While doing 'insert range' start block should be also shifted right. The bug can be easily reproduced by the following test: ptr = malloc(4096); assert(ptr); fd = open("./ext4.file", O_CREAT | O_TRUNC | O_RDWR, 0600); assert(fd >= 0); rc = fallocate(fd, 0, 0, 8192); assert(rc == 0); for (i = 0; i < 2048; i++) *((unsigned short *)ptr + i) = 0xbeef; rc = pwrite(fd, ptr, 4096, 0); assert(rc == 4096); rc = pwrite(fd, ptr, 4096, 4096); assert(rc == 4096); for (block = 2; block < 1000; block++) { rc = fallocate(fd, FALLOC_FL_INSERT_RANGE, 4096, 4096); assert(rc == 0); for (i = 0; i < 2048; i++) *((unsigned short *)ptr + i) = block; rc = pwrite(fd, ptr, 4096, 4096); assert(rc == 4096); } Because start block is not included in the range the hole appears at the wrong offset (just after the desired offset) and the following pwrite() overwrites already existent block, keeping hole untouched. Simple way to verify wrong behaviour is to check zeroed blocks after the test: $ hexdump ./ext4.file | grep '0000 0000' The root cause of the bug is a wrong range (start, stop], where start should be inclusive, i.e. [start, stop]. This patch fixes the problem by including start into the range. But not to break left shift (range collapse) stop points to the beginning of the a block, not to the end. The other not obvious change is an iterator check on validness in a main loop. Because iterator is unsigned the following corner case should be considered with care: insert a block at 0 offset, when stop variables overflows and never becomes less than start, which is 0. To handle this special case iterator is set to NULL to indicate that end of the loop is reached. Fixes: 331573febb6a2 Signed-off-by: Roman Pen Signed-off-by: Theodore Ts'o Cc: Namjae Jeon Cc: Andreas Dilger Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c930a0110fb4..b4987ea2ca79 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5353,8 +5353,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, if (!extent) goto out; - stop = le32_to_cpu(extent->ee_block) + - ext4_ext_get_actual_len(extent); + stop = le32_to_cpu(extent->ee_block); /* * In case of left shift, Don't start shifting extents until we make @@ -5393,8 +5392,12 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, else iterator = &stop; - /* Its safe to start updating extents */ - while (start < stop) { + /* + * Its safe to start updating extents. Start and stop are unsigned, so + * in case of right shift if extent with 0 block is reached, iterator + * becomes NULL to indicate the end of the loop. + */ + while (iterator && start <= stop) { path = ext4_find_extent(inode, *iterator, &path, 0); if (IS_ERR(path)) return PTR_ERR(path); @@ -5422,8 +5425,11 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, ext4_ext_get_actual_len(extent); } else { extent = EXT_FIRST_EXTENT(path[depth].p_hdr); - *iterator = le32_to_cpu(extent->ee_block) > 0 ? - le32_to_cpu(extent->ee_block) - 1 : 0; + if (le32_to_cpu(extent->ee_block) > 0) + *iterator = le32_to_cpu(extent->ee_block) - 1; + else + /* Beginning is reached, end of the loop */ + iterator = NULL; /* Update path extent in case we need to stop */ while (le32_to_cpu(extent->ee_block) < start) extent++; From e0b53d67291334125531f7f21b2f3df3d6ccc6ec Mon Sep 17 00:00:00 2001 From: Roman Pen Date: Sun, 8 Jan 2017 21:00:35 -0500 Subject: [PATCH 069/357] ext4: do not polute the extents cache while shifting extents commit 03e916fa8b5577d85471452a3d0c5738aa658dae upstream. Inside ext4_ext_shift_extents() function ext4_find_extent() is called without EXT4_EX_NOCACHE flag, which should prevent cache population. This leads to oudated offsets in the extents tree and wrong blocks afterwards. Patch fixes the problem providing EXT4_EX_NOCACHE flag for each ext4_find_extents() call inside ext4_ext_shift_extents function. Fixes: 331573febb6a2 Signed-off-by: Roman Pen Signed-off-by: Theodore Ts'o Cc: Namjae Jeon Cc: Andreas Dilger Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index b4987ea2ca79..9fbf92ca358c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5344,7 +5344,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, ext4_lblk_t stop, *iterator, ex_start, ex_end; /* Let path point to the last extent */ - path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); + path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, + EXT4_EX_NOCACHE); if (IS_ERR(path)) return PTR_ERR(path); @@ -5360,7 +5361,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * sure the hole is big enough to accommodate the shift. */ if (SHIFT == SHIFT_LEFT) { - path = ext4_find_extent(inode, start - 1, &path, 0); + path = ext4_find_extent(inode, start - 1, &path, + EXT4_EX_NOCACHE); if (IS_ERR(path)) return PTR_ERR(path); depth = path->p_depth; @@ -5398,7 +5400,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * becomes NULL to indicate the end of the loop. */ while (iterator && start <= stop) { - path = ext4_find_extent(inode, *iterator, &path, 0); + path = ext4_find_extent(inode, *iterator, &path, + EXT4_EX_NOCACHE); if (IS_ERR(path)) return PTR_ERR(path); depth = path->p_depth; From fc6c2da174edd7a7b760b12c60d432d300e05cca Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 27 Jan 2017 14:34:30 -0500 Subject: [PATCH 070/357] ext4: trim allocation requests to group size commit cd648b8a8fd5071d232242d5ee7ee3c0815776af upstream. If filesystem groups are artifically small (using parameter -g to mkfs.ext4), ext4_mb_normalize_request() can result in a request that is larger than a block group. Trim the request size to not confuse allocation code. Reported-by: "Kirill A. Shutemov" Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 7ae43c59bc79..2e9fc7a61048 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3123,6 +3123,13 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, if (ar->pright && start + size - 1 >= ar->lright) size -= start + size - ar->lright; + /* + * Trim allocation request for filesystems with artificially small + * groups. + */ + if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)) + size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb); + end = start + size; /* check we don't cross already preallocated blocks */ From a5a9cf387de6dde4c79b6e7fb6311dbd7ec86f02 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 27 Jan 2017 14:35:38 -0500 Subject: [PATCH 071/357] ext4: fix data corruption in data=journal mode commit 3b136499e906460919f0d21a49db1aaccf0ae963 upstream. ext4_journalled_write_end() did not propely handle all the cases when generic_perform_write() did not copy all the data into the target page and could mark buffers with uninitialized contents as uptodate and dirty leading to possible data corruption (which would be quickly fixed by generic_perform_write() retrying the write but still). Fix the problem by carefully handling the case when the page that is written to is not uptodate. Reported-by: Al Viro Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 33a509c876ee..a48b17cc0cd1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1379,7 +1379,9 @@ errout: * set the buffer to be dirty, since in data=journalled mode we need * to call ext4_handle_dirty_metadata() instead. */ -static void zero_new_buffers(struct page *page, unsigned from, unsigned to) +static void ext4_journalled_zero_new_buffers(handle_t *handle, + struct page *page, + unsigned from, unsigned to) { unsigned int block_start = 0, block_end; struct buffer_head *head, *bh; @@ -1396,7 +1398,7 @@ static void zero_new_buffers(struct page *page, unsigned from, unsigned to) size = min(to, block_end) - start; zero_user(page, start, size); - set_buffer_uptodate(bh); + write_end_fn(handle, bh); } clear_buffer_new(bh); } @@ -1428,15 +1430,16 @@ static int ext4_journalled_write_end(struct file *file, if (ext4_has_inline_data(inode)) copied = ext4_write_inline_data_end(inode, pos, len, copied, page); - else { - if (copied < len) { - if (!PageUptodate(page)) - copied = 0; - zero_new_buffers(page, from+copied, to); - } - + else if (unlikely(copied < len) && !PageUptodate(page)) { + copied = 0; + ext4_journalled_zero_new_buffers(handle, page, from, to); + } else { + if (unlikely(copied < len)) + ext4_journalled_zero_new_buffers(handle, page, + from + copied, to); ret = ext4_walk_page_buffers(handle, page_buffers(page), from, - to, &partial, write_end_fn); + from + copied, &partial, + write_end_fn); if (!partial) SetPageUptodate(page); } From 68ca0fdac41fa801e1e6c2f99e27a9c24e494532 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 1 Feb 2017 21:07:11 -0500 Subject: [PATCH 072/357] ext4: fix use-after-iput when fscrypt contexts are inconsistent commit dd01b690f8f4b1e414f89e5a9a5326bf720d6652 upstream. In the case where the child's encryption context was inconsistent with its parent directory, we were using inode->i_sb and inode->i_ino after the inode had already been iput(). Fix this by doing the iput() in the correct places. Note: only ext4 had this bug, not f2fs and ubifs. Fixes: d9cdc9033181 ("ext4 crypto: enforce context consistency") Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 104f8bfba718..c4a389a6027b 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1616,13 +1616,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi !fscrypt_has_permitted_context(dir, inode)) { int nokey = ext4_encrypted_inode(inode) && !fscrypt_has_encryption_key(inode); - iput(inode); - if (nokey) + if (nokey) { + iput(inode); return ERR_PTR(-ENOKEY); + } ext4_warning(inode->i_sb, "Inconsistent encryption contexts: %lu/%lu", (unsigned long) dir->i_ino, (unsigned long) inode->i_ino); + iput(inode); return ERR_PTR(-EPERM); } } From 0b37d0c0c6b35317bfc3aee8cdf94f853d594e5f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 4 Feb 2017 23:04:00 -0500 Subject: [PATCH 073/357] ext4: fix inline data error paths commit eb5efbcb762aee4b454b04f7115f73ccbcf8f0ef upstream. The write_end() function must always unlock the page and drop its ref count, even on an error. Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 9 ++++++++- fs/ext4/inode.c | 20 +++++++++++++++----- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 028329721bbe..37b521ed39df 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -933,8 +933,15 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, struct page *page) { int i_size_changed = 0; + int ret; - copied = ext4_write_inline_data_end(inode, pos, len, copied, page); + ret = ext4_write_inline_data_end(inode, pos, len, copied, page); + if (ret < 0) { + unlock_page(page); + put_page(page); + return ret; + } + copied = ret; /* * No need to use i_size_read() here, the i_size diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a48b17cc0cd1..1d4f5faa04b5 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1324,8 +1324,11 @@ static int ext4_write_end(struct file *file, if (ext4_has_inline_data(inode)) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); - if (ret < 0) + if (ret < 0) { + unlock_page(page); + put_page(page); goto errout; + } copied = ret; } else copied = block_write_end(file, mapping, pos, @@ -1427,10 +1430,16 @@ static int ext4_journalled_write_end(struct file *file, BUG_ON(!ext4_handle_valid(handle)); - if (ext4_has_inline_data(inode)) - copied = ext4_write_inline_data_end(inode, pos, len, - copied, page); - else if (unlikely(copied < len) && !PageUptodate(page)) { + if (ext4_has_inline_data(inode)) { + ret = ext4_write_inline_data_end(inode, pos, len, + copied, page); + if (ret < 0) { + unlock_page(page); + put_page(page); + goto errout; + } + copied = ret; + } else if (unlikely(copied < len) && !PageUptodate(page)) { copied = 0; ext4_journalled_zero_new_buffers(handle, page, from, to); } else { @@ -1465,6 +1474,7 @@ static int ext4_journalled_write_end(struct file *file, */ ext4_orphan_add(handle, inode); +errout: ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; From 269bf7b8c5db7544133dcff03bd741f980e1f110 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 4 Feb 2017 23:38:06 -0500 Subject: [PATCH 074/357] ext4: preserve the needs_recovery flag when the journal is aborted commit 97abd7d4b5d9c48ec15c425485f054e1c15e591b upstream. If the journal is aborted, the needs_recovery feature flag should not be removed. Otherwise, it's the journal might not get replayed and this could lead to more data getting lost. Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index bbc316db9495..322119c27348 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -825,6 +825,7 @@ static void ext4_put_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; + int aborted = 0; int i, err; ext4_unregister_li_request(sb); @@ -834,9 +835,10 @@ static void ext4_put_super(struct super_block *sb) destroy_workqueue(sbi->rsv_conversion_wq); if (sbi->s_journal) { + aborted = is_journal_aborted(sbi->s_journal); err = jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; - if (err < 0) + if ((err < 0) && !aborted) ext4_abort(sb, "Couldn't clean up the journal"); } @@ -847,7 +849,7 @@ static void ext4_put_super(struct super_block *sb) ext4_mb_release(sb); ext4_ext_release(sb); - if (!(sb->s_flags & MS_RDONLY)) { + if (!(sb->s_flags & MS_RDONLY) && !aborted) { ext4_clear_feature_journal_needs_recovery(sb); es->s_state = cpu_to_le16(sbi->s_mount_state); } From d6dcec965bc53eb375e50642b3b1abb9b835c2a7 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 5 Feb 2017 01:26:48 -0500 Subject: [PATCH 075/357] ext4: return EROFS if device is r/o and journal replay is needed commit 4753d8a24d4588657bc0a4cd66d4e282dff15c8c upstream. If the file system requires journal recovery, and the device is read-ony, return EROFS to the mount system call. This allows xfstests generic/050 to pass. Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 322119c27348..afe29ba42a4e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3913,7 +3913,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * root first: it may be modified in the journal! */ if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) { - if (ext4_load_journal(sb, es, journal_devnum)) + err = ext4_load_journal(sb, es, journal_devnum); + if (err) goto failed_mount3a; } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && ext4_has_feature_journal_needs_recovery(sb)) { From d6407e10bcf540a67afbeca83e60a4323cba8b33 Mon Sep 17 00:00:00 2001 From: Mathias Svensson Date: Fri, 6 Jan 2017 13:32:39 -0800 Subject: [PATCH 076/357] samples/seccomp: fix 64-bit comparison macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 916cafdc95843fb9af5fd5f83ca499d75473d107 upstream. There were some bugs in the JNE64 and JLT64 comparision macros. This fixes them, improves comments, and cleans up the file while we are at it. Reported-by: Stephen Röttger Signed-off-by: Mathias Svensson Signed-off-by: Kees Cook Signed-off-by: James Morris Signed-off-by: Greg Kroah-Hartman --- samples/seccomp/bpf-helper.h | 139 ++++++++++++++++++++--------------- 1 file changed, 79 insertions(+), 60 deletions(-) diff --git a/samples/seccomp/bpf-helper.h b/samples/seccomp/bpf-helper.h index 38ee70f3cd5b..1d8de9edd858 100644 --- a/samples/seccomp/bpf-helper.h +++ b/samples/seccomp/bpf-helper.h @@ -138,7 +138,7 @@ union arg64 { #define ARG_32(idx) \ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, LO_ARG(idx)) -/* Loads hi into A and lo in X */ +/* Loads lo into M[0] and hi into M[1] and A */ #define ARG_64(idx) \ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, LO_ARG(idx)), \ BPF_STMT(BPF_ST, 0), /* lo -> M[0] */ \ @@ -153,62 +153,14 @@ union arg64 { BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (value), 1, 0), \ jt -/* Checks the lo, then swaps to check the hi. A=lo,X=hi */ -#define JEQ64(lo, hi, jt) \ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ - BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (lo), 0, 2), \ - BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ - jt, \ - BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ - -#define JNE64(lo, hi, jt) \ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 5, 0), \ - BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (lo), 2, 0), \ - BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ - jt, \ - BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ - #define JA32(value, jt) \ BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (value), 0, 1), \ jt -#define JA64(lo, hi, jt) \ - BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (hi), 3, 0), \ - BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ - BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (lo), 0, 2), \ - BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ - jt, \ - BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ - #define JGE32(value, jt) \ BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (value), 0, 1), \ jt -#define JLT32(value, jt) \ - BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (value), 1, 0), \ - jt - -/* Shortcut checking if hi > arg.hi. */ -#define JGE64(lo, hi, jt) \ - BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 4, 0), \ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ - BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ - BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (lo), 0, 2), \ - BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ - jt, \ - BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ - -#define JLT64(lo, hi, jt) \ - BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (hi), 0, 4), \ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ - BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ - BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (lo), 2, 0), \ - BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ - jt, \ - BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ - #define JGT32(value, jt) \ BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (value), 0, 1), \ jt @@ -217,24 +169,91 @@ union arg64 { BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (value), 1, 0), \ jt -/* Check hi > args.hi first, then do the GE checking */ -#define JGT64(lo, hi, jt) \ - BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 4, 0), \ +#define JLT32(value, jt) \ + BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (value), 1, 0), \ + jt + +/* + * All the JXX64 checks assume lo is saved in M[0] and hi is saved in both + * A and M[1]. This invariant is kept by restoring A if necessary. + */ +#define JEQ64(lo, hi, jt) \ + /* if (hi != arg.hi) goto NOMATCH; */ \ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ - BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (lo), 0, 2), \ - BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ + /* if (lo != arg.lo) goto NOMATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (lo), 0, 2), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), \ jt, \ - BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ + BPF_STMT(BPF_LD+BPF_MEM, 1) + +#define JNE64(lo, hi, jt) \ + /* if (hi != arg.hi) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 3), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), \ + /* if (lo != arg.lo) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (lo), 2, 0), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) + +#define JA64(lo, hi, jt) \ + /* if (hi & arg.hi) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (hi), 3, 0), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), \ + /* if (lo & arg.lo) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (lo), 0, 2), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) + +#define JGE64(lo, hi, jt) \ + /* if (hi > arg.hi) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 4, 0), \ + /* if (hi != arg.hi) goto NOMATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), \ + /* if (lo >= arg.lo) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (lo), 0, 2), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) + +#define JGT64(lo, hi, jt) \ + /* if (hi > arg.hi) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 4, 0), \ + /* if (hi != arg.hi) goto NOMATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), \ + /* if (lo > arg.lo) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (lo), 0, 2), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) #define JLE64(lo, hi, jt) \ - BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 6, 0), \ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 3), \ - BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ + /* if (hi < arg.hi) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (hi), 0, 4), \ + /* if (hi != arg.hi) goto NOMATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), \ + /* if (lo <= arg.lo) goto MATCH; */ \ BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (lo), 2, 0), \ - BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ + BPF_STMT(BPF_LD+BPF_MEM, 1), \ jt, \ - BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ + BPF_STMT(BPF_LD+BPF_MEM, 1) + +#define JLT64(lo, hi, jt) \ + /* if (hi < arg.hi) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (hi), 0, 4), \ + /* if (hi != arg.hi) goto NOMATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), \ + /* if (lo < arg.lo) goto MATCH; */ \ + BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (lo), 2, 0), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) #define LOAD_SYSCALL_NR \ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ From d2a8cd3eee0fe9b14fe3a72c02cffa7df31a2383 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Wed, 8 Feb 2017 00:41:45 +0200 Subject: [PATCH 077/357] mei: remove support for broken parallel read commit cb97fbbcac15982406e0c74cd5512a8b6fcf10b3 upstream. Parallel reads from multiple threads on a file descriptor are not well defined and racy. It is safer to return to original behavior and simply fail the additional read. The solution is to remove request for next read credit. Fixes: ff1586a7ea57 ("mei: enqueue consecutive reads") Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/main.c | 48 ++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index fa50635512e8..41f318631c6d 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -182,32 +182,36 @@ static ssize_t mei_read(struct file *file, char __user *ubuf, goto out; } - if (rets == -EBUSY && - !mei_cl_enqueue_ctrl_wr_cb(cl, length, MEI_FOP_READ, file)) { - rets = -ENOMEM; + +again: + mutex_unlock(&dev->device_lock); + if (wait_event_interruptible(cl->rx_wait, + !list_empty(&cl->rd_completed) || + !mei_cl_is_connected(cl))) { + if (signal_pending(current)) + return -EINTR; + return -ERESTARTSYS; + } + mutex_lock(&dev->device_lock); + + if (!mei_cl_is_connected(cl)) { + rets = -ENODEV; goto out; } - do { - mutex_unlock(&dev->device_lock); + cb = mei_cl_read_cb(cl, file); + if (!cb) { + /* + * For amthif all the waiters are woken up, + * but only fp with matching cb->fp get the cb, + * the others have to return to wait on read. + */ + if (cl == &dev->iamthif_cl) + goto again; - if (wait_event_interruptible(cl->rx_wait, - (!list_empty(&cl->rd_completed)) || - (!mei_cl_is_connected(cl)))) { - - if (signal_pending(current)) - return -EINTR; - return -ERESTARTSYS; - } - - mutex_lock(&dev->device_lock); - if (!mei_cl_is_connected(cl)) { - rets = -ENODEV; - goto out; - } - - cb = mei_cl_read_cb(cl, file); - } while (!cb); + rets = 0; + goto out; + } copy_buffer: /* now copy the data to user space */ From bac7f7135d51144ff193002b296b8bdf06c9855f Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Thu, 2 Feb 2017 08:32:18 +0200 Subject: [PATCH 078/357] ath10k: fix boot failure in UTF mode/testmode commit cb4281528b62207918b1e95827cad7527aa4dbaa upstream. Rx filter reset and the dynamic tx switch mode (EXT_RESOURCE_CFG) configuration are causing the following errors when UTF firmware is loaded to the target. Error message 1: [ 598.015629] ath10k_pci 0001:01:00.0: failed to ping firmware: -110 [ 598.020828] ath10k_pci 0001:01:00.0: failed to reset rx filter: -110 [ 598.141556] ath10k_pci 0001:01:00.0: failed to start core (testmode): -110 Error message 2: [ 668.615839] ath10k_ahb a000000.wifi: failed to send ext resource cfg command : -95 [ 668.618902] ath10k_ahb a000000.wifi: failed to start core (testmode): -95 Avoiding these configurations while bringing the target in testmode is solving the problem. Signed-off-by: Tamizh chelvam Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/core.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 0c4532227f25..972b5e224d5d 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -1901,7 +1901,8 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, ath10k_dbg(ar, ATH10K_DBG_BOOT, "firmware %s booted\n", ar->hw->wiphy->fw_version); - if (test_bit(WMI_SERVICE_EXT_RES_CFG_SUPPORT, ar->wmi.svc_map)) { + if (test_bit(WMI_SERVICE_EXT_RES_CFG_SUPPORT, ar->wmi.svc_map) && + mode == ATH10K_FIRMWARE_MODE_NORMAL) { val = 0; if (ath10k_peer_stats_enabled(ar)) val = WMI_10_4_PEER_STATS; @@ -1954,10 +1955,13 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, * possible to implicitly make it correct by creating a dummy vdev and * then deleting it. */ - status = ath10k_core_reset_rx_filter(ar); - if (status) { - ath10k_err(ar, "failed to reset rx filter: %d\n", status); - goto err_hif_stop; + if (mode == ATH10K_FIRMWARE_MODE_NORMAL) { + status = ath10k_core_reset_rx_filter(ar); + if (status) { + ath10k_err(ar, + "failed to reset rx filter: %d\n", status); + goto err_hif_stop; + } } /* If firmware indicates Full Rx Reorder support it must be used in a From 93c1f1db1a7421545e9aae9fb6091668ca570f2b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 11 Jan 2017 16:32:13 +0200 Subject: [PATCH 079/357] ath5k: drop bogus warning on drv_set_key with unsupported cipher commit a70e1d6fd6b5e1a81fa6171600942bee34f5128f upstream. Simply return -EOPNOTSUPP instead. Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath5k/mac80211-ops.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c index dc44cfef7517..16e052d02c94 100644 --- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c +++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c @@ -502,8 +502,7 @@ ath5k_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, break; return -EOPNOTSUPP; default: - WARN_ON(1); - return -EINVAL; + return -EOPNOTSUPP; } mutex_lock(&ah->lock); From c41cae06bf6027d2f77ebbb12bc17da1cd6e131f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 2 Feb 2017 10:14:52 +0100 Subject: [PATCH 080/357] ath9k: fix race condition in enabling/disabling IRQs commit 3a5e969bb2f6692a256352649355d56d018d6b88 upstream. The code currently relies on refcounting to disable IRQs from within the IRQ handler and re-enabling them again after the tasklet has run. However, due to race conditions sometimes the IRQ handler might be called twice, or the tasklet may not run at all (if interrupted in the middle of a reset). This can cause nasty imbalances in the irq-disable refcount which will get the driver permanently stuck until the entire radio has been stopped and started again (ath_reset will not recover from this). Instead of using this fragile logic, change the code to ensure that running the irq handler during tasklet processing is safe, and leave the refcount untouched. Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/ath9k.h | 1 + drivers/net/wireless/ath/ath9k/init.c | 1 + drivers/net/wireless/ath/ath9k/mac.c | 44 ++++++++++++++++++++------ drivers/net/wireless/ath/ath9k/mac.h | 1 + drivers/net/wireless/ath/ath9k/main.c | 27 +++++++--------- 5 files changed, 48 insertions(+), 26 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h index 26fc8ecfe8c4..a7316710a902 100644 --- a/drivers/net/wireless/ath/ath9k/ath9k.h +++ b/drivers/net/wireless/ath/ath9k/ath9k.h @@ -959,6 +959,7 @@ struct ath_softc { struct survey_info *cur_survey; struct survey_info survey[ATH9K_NUM_CHANNELS]; + spinlock_t intr_lock; struct tasklet_struct intr_tq; struct tasklet_struct bcon_tasklet; struct ath_hw *sc_ah; diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index cfa3fe82ade3..297d4bbc5c05 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -626,6 +626,7 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc, common->bt_ant_diversity = 1; spin_lock_init(&common->cc_lock); + spin_lock_init(&sc->intr_lock); spin_lock_init(&sc->sc_serial_rw); spin_lock_init(&sc->sc_pm_lock); spin_lock_init(&sc->chan_lock); diff --git a/drivers/net/wireless/ath/ath9k/mac.c b/drivers/net/wireless/ath/ath9k/mac.c index bba85d1a6cd1..d937c39b3a0b 100644 --- a/drivers/net/wireless/ath/ath9k/mac.c +++ b/drivers/net/wireless/ath/ath9k/mac.c @@ -805,21 +805,12 @@ void ath9k_hw_disable_interrupts(struct ath_hw *ah) } EXPORT_SYMBOL(ath9k_hw_disable_interrupts); -void ath9k_hw_enable_interrupts(struct ath_hw *ah) +static void __ath9k_hw_enable_interrupts(struct ath_hw *ah) { struct ath_common *common = ath9k_hw_common(ah); u32 sync_default = AR_INTR_SYNC_DEFAULT; u32 async_mask; - if (!(ah->imask & ATH9K_INT_GLOBAL)) - return; - - if (!atomic_inc_and_test(&ah->intr_ref_cnt)) { - ath_dbg(common, INTERRUPT, "Do not enable IER ref count %d\n", - atomic_read(&ah->intr_ref_cnt)); - return; - } - if (AR_SREV_9340(ah) || AR_SREV_9550(ah) || AR_SREV_9531(ah) || AR_SREV_9561(ah)) sync_default &= ~AR_INTR_SYNC_HOST1_FATAL; @@ -841,6 +832,39 @@ void ath9k_hw_enable_interrupts(struct ath_hw *ah) ath_dbg(common, INTERRUPT, "AR_IMR 0x%x IER 0x%x\n", REG_READ(ah, AR_IMR), REG_READ(ah, AR_IER)); } + +void ath9k_hw_resume_interrupts(struct ath_hw *ah) +{ + struct ath_common *common = ath9k_hw_common(ah); + + if (!(ah->imask & ATH9K_INT_GLOBAL)) + return; + + if (atomic_read(&ah->intr_ref_cnt) != 0) { + ath_dbg(common, INTERRUPT, "Do not enable IER ref count %d\n", + atomic_read(&ah->intr_ref_cnt)); + return; + } + + __ath9k_hw_enable_interrupts(ah); +} +EXPORT_SYMBOL(ath9k_hw_resume_interrupts); + +void ath9k_hw_enable_interrupts(struct ath_hw *ah) +{ + struct ath_common *common = ath9k_hw_common(ah); + + if (!(ah->imask & ATH9K_INT_GLOBAL)) + return; + + if (!atomic_inc_and_test(&ah->intr_ref_cnt)) { + ath_dbg(common, INTERRUPT, "Do not enable IER ref count %d\n", + atomic_read(&ah->intr_ref_cnt)); + return; + } + + __ath9k_hw_enable_interrupts(ah); +} EXPORT_SYMBOL(ath9k_hw_enable_interrupts); void ath9k_hw_set_interrupts(struct ath_hw *ah) diff --git a/drivers/net/wireless/ath/ath9k/mac.h b/drivers/net/wireless/ath/ath9k/mac.h index 3bab01435a86..770fc11b41d1 100644 --- a/drivers/net/wireless/ath/ath9k/mac.h +++ b/drivers/net/wireless/ath/ath9k/mac.h @@ -744,6 +744,7 @@ void ath9k_hw_set_interrupts(struct ath_hw *ah); void ath9k_hw_enable_interrupts(struct ath_hw *ah); void ath9k_hw_disable_interrupts(struct ath_hw *ah); void ath9k_hw_kill_interrupts(struct ath_hw *ah); +void ath9k_hw_resume_interrupts(struct ath_hw *ah); void ar9002_hw_attach_mac_ops(struct ath_hw *ah); diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index e9f32b52fc8c..b868f02ced89 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -373,21 +373,20 @@ void ath9k_tasklet(unsigned long data) struct ath_common *common = ath9k_hw_common(ah); enum ath_reset_type type; unsigned long flags; - u32 status = sc->intrstatus; + u32 status; u32 rxmask; + spin_lock_irqsave(&sc->intr_lock, flags); + status = sc->intrstatus; + sc->intrstatus = 0; + spin_unlock_irqrestore(&sc->intr_lock, flags); + ath9k_ps_wakeup(sc); spin_lock(&sc->sc_pcu_lock); if (status & ATH9K_INT_FATAL) { type = RESET_TYPE_FATAL_INT; ath9k_queue_reset(sc, type); - - /* - * Increment the ref. counter here so that - * interrupts are enabled in the reset routine. - */ - atomic_inc(&ah->intr_ref_cnt); ath_dbg(common, RESET, "FATAL: Skipping interrupts\n"); goto out; } @@ -403,11 +402,6 @@ void ath9k_tasklet(unsigned long data) type = RESET_TYPE_BB_WATCHDOG; ath9k_queue_reset(sc, type); - /* - * Increment the ref. counter here so that - * interrupts are enabled in the reset routine. - */ - atomic_inc(&ah->intr_ref_cnt); ath_dbg(common, RESET, "BB_WATCHDOG: Skipping interrupts\n"); goto out; @@ -420,7 +414,6 @@ void ath9k_tasklet(unsigned long data) if ((sc->gtt_cnt >= MAX_GTT_CNT) && !ath9k_hw_check_alive(ah)) { type = RESET_TYPE_TX_GTT; ath9k_queue_reset(sc, type); - atomic_inc(&ah->intr_ref_cnt); ath_dbg(common, RESET, "GTT: Skipping interrupts\n"); goto out; @@ -477,7 +470,7 @@ void ath9k_tasklet(unsigned long data) ath9k_btcoex_handle_interrupt(sc, status); /* re-enable hardware interrupt */ - ath9k_hw_enable_interrupts(ah); + ath9k_hw_resume_interrupts(ah); out: spin_unlock(&sc->sc_pcu_lock); ath9k_ps_restore(sc); @@ -541,7 +534,9 @@ irqreturn_t ath_isr(int irq, void *dev) return IRQ_NONE; /* Cache the status */ - sc->intrstatus = status; + spin_lock(&sc->intr_lock); + sc->intrstatus |= status; + spin_unlock(&sc->intr_lock); if (status & SCHED_INTR) sched = true; @@ -587,7 +582,7 @@ chip_reset: if (sched) { /* turn off every interrupt */ - ath9k_hw_disable_interrupts(ah); + ath9k_hw_kill_interrupts(ah); tasklet_schedule(&sc->intr_tq); } From 787fd7a6c8ede68e55321c5bfbea9f1ca89a8148 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Tue, 14 Feb 2017 20:10:30 +0100 Subject: [PATCH 081/357] ath9k: use correct OTP register offsets for the AR9340 and AR9550 commit c9f1e32600816d695f817477d56490bfc2ba43c6 upstream. This patch fixes the OTP register definitions for the AR934x and AR9550 WMAC SoC. Previously, the ath9k driver was unable to initialize the integrated WMAC on an Aerohive AP121: | ath: phy0: timeout (1000 us) on reg 0x30018: 0xbadc0ffe & 0x00000007 != 0x00000004 | ath: phy0: timeout (1000 us) on reg 0x30018: 0xbadc0ffe & 0x00000007 != 0x00000004 | ath: phy0: Unable to initialize hardware; initialization status: -5 | ath9k ar934x_wmac: failed to initialize device | ath9k: probe of ar934x_wmac failed with error -5 It turns out that the AR9300_OTP_STATUS and AR9300_OTP_DATA definitions contain a typo. Cc: Gabor Juhos Fixes: add295a4afbdf5852d0 "ath9k: use correct OTP register offsets for AR9550" Signed-off-by: Christian Lamparter Signed-off-by: Chris Blake Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/ar9003_eeprom.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.h b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.h index 107bcfbbe0fb..cb37bf01920e 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.h +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.h @@ -73,13 +73,13 @@ #define AR9300_OTP_BASE \ ((AR_SREV_9340(ah) || AR_SREV_9550(ah)) ? 0x30000 : 0x14000) #define AR9300_OTP_STATUS \ - ((AR_SREV_9340(ah) || AR_SREV_9550(ah)) ? 0x30018 : 0x15f18) + ((AR_SREV_9340(ah) || AR_SREV_9550(ah)) ? 0x31018 : 0x15f18) #define AR9300_OTP_STATUS_TYPE 0x7 #define AR9300_OTP_STATUS_VALID 0x4 #define AR9300_OTP_STATUS_ACCESS_BUSY 0x2 #define AR9300_OTP_STATUS_SM_BUSY 0x1 #define AR9300_OTP_READ_DATA \ - ((AR_SREV_9340(ah) || AR_SREV_9550(ah)) ? 0x3001c : 0x15f1c) + ((AR_SREV_9340(ah) || AR_SREV_9550(ah)) ? 0x3101c : 0x15f1c) enum targetPowerHTRates { HT_TARGET_RATE_0_8_16, From bc5338a4fd3d9042107ce3fe40792c1c4839ebab Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 10 Feb 2017 15:18:46 -0600 Subject: [PATCH 082/357] PCI: hv: Fix wslot_to_devfn() to fix warnings on device removal commit 60e2e2fbafdd1285ae1b4ad39ded41603e0c74d0 upstream. The devfn of 00:02.0 is 0x10. devfn_to_wslot(0x10) == 0x2, and wslot_to_devfn(0x2) should be 0x10, while it's 0x2 in the current code. Due to this, hv_eject_device_work() -> pci_get_domain_bus_and_slot() returns NULL and pci_stop_and_remove_bus_device() is not called. Later when the real device driver's .remove() is invoked by hv_pci_remove() -> pci_stop_root_bus(), some warnings can be noticed because the VM has lost the access to the underlying device at that time. Signed-off-by: Jake Oshins Signed-off-by: Dexuan Cui Signed-off-by: Bjorn Helgaas Acked-by: Haiyang Zhang CC: K. Y. Srinivasan CC: Stephen Hemminger Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pci-hyperv.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index 763ff8745828..61fc349c96d4 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -130,7 +130,8 @@ union pci_version { */ union win_slot_encoding { struct { - u32 func:8; + u32 dev:5; + u32 func:3; u32 reserved:24; } bits; u32 slot; @@ -483,7 +484,8 @@ static u32 devfn_to_wslot(int devfn) union win_slot_encoding wslot; wslot.slot = 0; - wslot.bits.func = PCI_SLOT(devfn) | (PCI_FUNC(devfn) << 5); + wslot.bits.dev = PCI_SLOT(devfn); + wslot.bits.func = PCI_FUNC(devfn); return wslot.slot; } @@ -501,7 +503,7 @@ static int wslot_to_devfn(u32 wslot) union win_slot_encoding slot_no; slot_no.slot = wslot; - return PCI_DEVFN(0, slot_no.bits.func); + return PCI_DEVFN(slot_no.bits.dev, slot_no.bits.func); } /* From 1fb738a3dc1304250c755e5e31715137c1c44c50 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 16 Feb 2017 10:22:34 +1100 Subject: [PATCH 083/357] pci/hotplug/pnv-php: Disable MSI and PCI device properly commit 49f4b08e61547a5ccd2db551d994c4503efe5666 upstream. pnv_php_disable_irq() can be called in two paths: Bailing path in pnv_php_enable_irq() or releasing slot. The MSI (or MSIx) interrupts is disabled unconditionally in pnv_php_disable_irq(). It's wrong because that might be enabled by drivers other than pnv-php. This disables MSI (or MSIx) interrupts and the PCI device only if it was enabled by pnv-php. In the error path of pnv_php_enable_irq(), we rely on the newly added parameter @disable_device. In the path of releasing slot, @pnv_php->irq is checked. Fixes: 360aebd85a4c ("drivers/pci/hotplug: Support surprise hotplug in powernv driver") Signed-off-by: Gavin Shan Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/pnv_php.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 56efaf72d08e..acb2be0c8c2c 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -35,9 +35,11 @@ static void pnv_php_register(struct device_node *dn); static void pnv_php_unregister_one(struct device_node *dn); static void pnv_php_unregister(struct device_node *dn); -static void pnv_php_disable_irq(struct pnv_php_slot *php_slot) +static void pnv_php_disable_irq(struct pnv_php_slot *php_slot, + bool disable_device) { struct pci_dev *pdev = php_slot->pdev; + int irq = php_slot->irq; u16 ctrl; if (php_slot->irq > 0) { @@ -56,10 +58,14 @@ static void pnv_php_disable_irq(struct pnv_php_slot *php_slot) php_slot->wq = NULL; } - if (pdev->msix_enabled) - pci_disable_msix(pdev); - else if (pdev->msi_enabled) - pci_disable_msi(pdev); + if (disable_device || irq > 0) { + if (pdev->msix_enabled) + pci_disable_msix(pdev); + else if (pdev->msi_enabled) + pci_disable_msi(pdev); + + pci_disable_device(pdev); + } } static void pnv_php_free_slot(struct kref *kref) @@ -68,7 +74,7 @@ static void pnv_php_free_slot(struct kref *kref) struct pnv_php_slot, kref); WARN_ON(!list_empty(&php_slot->children)); - pnv_php_disable_irq(php_slot); + pnv_php_disable_irq(php_slot, false); kfree(php_slot->name); kfree(php_slot); } @@ -759,7 +765,7 @@ static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq) php_slot->wq = alloc_workqueue("pciehp-%s", 0, 0, php_slot->name); if (!php_slot->wq) { dev_warn(&pdev->dev, "Cannot alloc workqueue\n"); - pnv_php_disable_irq(php_slot); + pnv_php_disable_irq(php_slot, true); return; } @@ -772,7 +778,7 @@ static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq) ret = request_irq(irq, pnv_php_interrupt, IRQF_SHARED, php_slot->name, php_slot); if (ret) { - pnv_php_disable_irq(php_slot); + pnv_php_disable_irq(php_slot, true); dev_warn(&pdev->dev, "Error %d enabling IRQ %d\n", ret, irq); return; } From 730b1b20c6cdc63f4ce32efc5a494fc9b141b854 Mon Sep 17 00:00:00 2001 From: Ley Foon Tan Date: Tue, 28 Feb 2017 18:37:16 +0800 Subject: [PATCH 084/357] PCI: altera: Fix TLP_CFG_DW0 for TLP write commit 2a7275a3d867b228216886aae35e1f64291180b1 upstream. eb5767122feb ("PCI: altera: Simplify TLB_CFG_DW0 usage") used TLP_FMTTYPE_CFGRD* (instead of TLP_FMTTYPE_CFGWR*) for TLP writes, which causes writing to configuration space to fail. Fix it by using correct FMTTYPE for write operation. Fixes: eb5767122feb ("PCI: altera: Simplify TLB_CFG_DW0 usage") Signed-off-by: Ley Foon Tan Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pcie-altera.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/pci/host/pcie-altera.c b/drivers/pci/host/pcie-altera.c index b0ac4dfafa0b..f2907e7adb5d 100644 --- a/drivers/pci/host/pcie-altera.c +++ b/drivers/pci/host/pcie-altera.c @@ -57,10 +57,14 @@ #define TLP_WRITE_TAG 0x10 #define RP_DEVFN 0 #define TLP_REQ_ID(bus, devfn) (((bus) << 8) | (devfn)) -#define TLP_CFG_DW0(pcie, bus) \ +#define TLP_CFGRD_DW0(pcie, bus) \ ((((bus == pcie->root_bus_nr) ? TLP_FMTTYPE_CFGRD0 \ : TLP_FMTTYPE_CFGRD1) << 24) | \ TLP_PAYLOAD_SIZE) +#define TLP_CFGWR_DW0(pcie, bus) \ + ((((bus == pcie->root_bus_nr) ? TLP_FMTTYPE_CFGWR0 \ + : TLP_FMTTYPE_CFGWR1) << 24) | \ + TLP_PAYLOAD_SIZE) #define TLP_CFG_DW1(pcie, tag, be) \ (((TLP_REQ_ID(pcie->root_bus_nr, RP_DEVFN)) << 16) | (tag << 8) | (be)) #define TLP_CFG_DW2(bus, devfn, offset) \ @@ -222,7 +226,7 @@ static int tlp_cfg_dword_read(struct altera_pcie *pcie, u8 bus, u32 devfn, { u32 headers[TLP_HDR_SIZE]; - headers[0] = TLP_CFG_DW0(pcie, bus); + headers[0] = TLP_CFGRD_DW0(pcie, bus); headers[1] = TLP_CFG_DW1(pcie, TLP_READ_TAG, byte_en); headers[2] = TLP_CFG_DW2(bus, devfn, where); @@ -237,7 +241,7 @@ static int tlp_cfg_dword_write(struct altera_pcie *pcie, u8 bus, u32 devfn, u32 headers[TLP_HDR_SIZE]; int ret; - headers[0] = TLP_CFG_DW0(pcie, bus); + headers[0] = TLP_CFGWR_DW0(pcie, bus); headers[1] = TLP_CFG_DW1(pcie, TLP_WRITE_TAG, byte_en); headers[2] = TLP_CFG_DW2(bus, devfn, where); From 65013a93b6c335bd166c911d73c47ff4bcbdb8be Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 7 Dec 2016 01:16:24 -0800 Subject: [PATCH 085/357] Drivers: hv: vmbus: Raise retry/wait limits in vmbus_post_msg() commit c0bb03924f1a80e7f65900e36c8e6b3dc167c5f8 upstream. DoS protection conditions were altered in WS2016 and now it's easy to get -EAGAIN returned from vmbus_post_msg() (e.g. when we try changing MTU on a netvsc device in a loop). All vmbus_post_msg() callers don't retry the operation and we usually end up with a non-functional device or crash. While host's DoS protection conditions are unknown to me my tests show that it can take up to 10 seconds before the message is sent so doing udelay() is not an option, we really need to sleep. Almost all vmbus_post_msg() callers are ready to sleep but there is one special case: vmbus_initiate_unload() which can be called from interrupt/NMI context and we can't sleep there. I'm also not sure about the lonely vmbus_send_tl_connect_request() which has no in-tree users but its external users are most likely waiting for the host to reply so sleeping there is also appropriate. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 17 +++++++++-------- drivers/hv/channel_mgmt.c | 10 ++++++---- drivers/hv/connection.c | 17 ++++++++++++----- drivers/hv/hyperv_vmbus.h | 2 +- 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 5fb4c6d9209b..d5b8d9fd50bb 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -181,7 +181,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); ret = vmbus_post_msg(open_msg, - sizeof(struct vmbus_channel_open_channel)); + sizeof(struct vmbus_channel_open_channel), true); if (ret != 0) { err = ret; @@ -233,7 +233,7 @@ int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, conn_msg.guest_endpoint_id = *shv_guest_servie_id; conn_msg.host_service_id = *shv_host_servie_id; - return vmbus_post_msg(&conn_msg, sizeof(conn_msg)); + return vmbus_post_msg(&conn_msg, sizeof(conn_msg), true); } EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request); @@ -419,7 +419,7 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); ret = vmbus_post_msg(gpadlmsg, msginfo->msgsize - - sizeof(*msginfo)); + sizeof(*msginfo), true); if (ret != 0) goto cleanup; @@ -433,8 +433,8 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, gpadl_body->gpadl = next_gpadl_handle; ret = vmbus_post_msg(gpadl_body, - submsginfo->msgsize - - sizeof(*submsginfo)); + submsginfo->msgsize - sizeof(*submsginfo), + true); if (ret != 0) goto cleanup; @@ -485,8 +485,8 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle) list_add_tail(&info->msglistentry, &vmbus_connection.chn_msg_list); spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); - ret = vmbus_post_msg(msg, - sizeof(struct vmbus_channel_gpadl_teardown)); + ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_gpadl_teardown), + true); if (ret) goto post_msg_err; @@ -557,7 +557,8 @@ static int vmbus_close_internal(struct vmbus_channel *channel) msg->header.msgtype = CHANNELMSG_CLOSECHANNEL; msg->child_relid = channel->offermsg.child_relid; - ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_close_channel)); + ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_close_channel), + true); if (ret) { pr_err("Close failed: close post msg return is %d\n", ret); diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index caf341842464..f3a8b52acb51 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -321,7 +321,8 @@ static void vmbus_release_relid(u32 relid) memset(&msg, 0, sizeof(struct vmbus_channel_relid_released)); msg.child_relid = relid; msg.header.msgtype = CHANNELMSG_RELID_RELEASED; - vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released)); + vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released), + true); } void hv_event_tasklet_disable(struct vmbus_channel *channel) @@ -728,7 +729,8 @@ void vmbus_initiate_unload(bool crash) init_completion(&vmbus_connection.unload_event); memset(&hdr, 0, sizeof(struct vmbus_channel_message_header)); hdr.msgtype = CHANNELMSG_UNLOAD; - vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header)); + vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header), + !crash); /* * vmbus_initiate_unload() is also called on crash and the crash can be @@ -1116,8 +1118,8 @@ int vmbus_request_offers(void) msg->msgtype = CHANNELMSG_REQUESTOFFERS; - ret = vmbus_post_msg(msg, - sizeof(struct vmbus_channel_message_header)); + ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header), + true); if (ret != 0) { pr_err("Unable to request offers - %d\n", ret); diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 78e6368a4423..840b6db0ea4b 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -110,7 +110,8 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); ret = vmbus_post_msg(msg, - sizeof(struct vmbus_channel_initiate_contact)); + sizeof(struct vmbus_channel_initiate_contact), + true); if (ret != 0) { spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); list_del(&msginfo->msglistentry); @@ -434,7 +435,7 @@ void vmbus_on_event(unsigned long data) /* * vmbus_post_msg - Send a msg on the vmbus's message connection */ -int vmbus_post_msg(void *buffer, size_t buflen) +int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep) { union hv_connection_id conn_id; int ret = 0; @@ -449,7 +450,7 @@ int vmbus_post_msg(void *buffer, size_t buflen) * insufficient resources. Retry the operation a couple of * times before giving up. */ - while (retries < 20) { + while (retries < 100) { ret = hv_post_message(conn_id, 1, buffer, buflen); switch (ret) { @@ -472,8 +473,14 @@ int vmbus_post_msg(void *buffer, size_t buflen) } retries++; - udelay(usec); - if (usec < 2048) + if (can_sleep && usec > 1000) + msleep(usec / 1000); + else if (usec < MAX_UDELAY_MS * 1000) + udelay(usec); + else + mdelay(usec / 1000); + + if (usec < 256000) usec *= 2; } return ret; diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 2b13f2a0a71e..8d7f865c1133 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -683,7 +683,7 @@ void vmbus_free_channels(void); int vmbus_connect(void); void vmbus_disconnect(void); -int vmbus_post_msg(void *buffer, size_t buflen); +int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep); void vmbus_on_event(unsigned long data); void vmbus_on_msg_dpc(unsigned long data); From 11a4d644d6d84a34a393a389d5efd4e036b04a62 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Thu, 5 Jan 2017 22:23:31 -0800 Subject: [PATCH 086/357] perf callchain: Reference count maps commit aa33b9b9a2ebb00d33c83a5312d4fbf2d5aeba36 upstream. If dso__load_kcore frees all of the existing maps, but one has already been attached to a callchain cursor node, then we can get a SIGSEGV in any function that happens to try to use this invalid cursor. Use the existing map refcount mechanism to forestall cleanup of a map until the cursor iterates past the node. Signed-off-by: Krister Johansen Tested-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Masami Hiramatsu Cc: Namhyung Kim Fixes: 84c2cafa2889 ("perf tools: Reference count struct map") Link: http://lkml.kernel.org/r/20170106062331.GB2707@templeofstupid.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/callchain.c | 11 +++++++++-- tools/perf/util/callchain.h | 6 ++++++ tools/perf/util/hist.c | 7 +++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index ae58b493af45..ecf6236f3b5f 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -437,7 +437,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) } call->ip = cursor_node->ip; call->ms.sym = cursor_node->sym; - call->ms.map = cursor_node->map; + call->ms.map = map__get(cursor_node->map); list_add_tail(&call->list, &node->val); callchain_cursor_advance(cursor); @@ -462,6 +462,7 @@ add_child(struct callchain_node *parent, list_for_each_entry_safe(call, tmp, &new->val, list) { list_del(&call->list); + map__zput(call->ms.map); free(call); } free(new); @@ -730,6 +731,7 @@ merge_chain_branch(struct callchain_cursor *cursor, callchain_cursor_append(cursor, list->ip, list->ms.map, list->ms.sym); list_del(&list->list); + map__zput(list->ms.map); free(list); } @@ -778,7 +780,8 @@ int callchain_cursor_append(struct callchain_cursor *cursor, } node->ip = ip; - node->map = map; + map__zput(node->map); + node->map = map__get(map); node->sym = sym; cursor->nr++; @@ -945,11 +948,13 @@ static void free_callchain_node(struct callchain_node *node) list_for_each_entry_safe(list, tmp, &node->parent_val, list) { list_del(&list->list); + map__zput(list->ms.map); free(list); } list_for_each_entry_safe(list, tmp, &node->val, list) { list_del(&list->list); + map__zput(list->ms.map); free(list); } @@ -1013,6 +1018,7 @@ int callchain_node__make_parent_list(struct callchain_node *node) goto out; *new = *chain; new->has_children = false; + map__get(new->ms.map); list_add_tail(&new->list, &head); } parent = parent->parent; @@ -1033,6 +1039,7 @@ int callchain_node__make_parent_list(struct callchain_node *node) out: list_for_each_entry_safe(chain, new, &head, list) { list_del(&chain->list); + map__zput(chain->ms.map); free(chain); } return -ENOMEM; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 47cfd1080975..b7cbabb3931f 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -5,6 +5,7 @@ #include #include #include "event.h" +#include "map.h" #include "symbol.h" #define HELP_PAD "\t\t\t\t" @@ -174,8 +175,13 @@ int callchain_merge(struct callchain_cursor *cursor, */ static inline void callchain_cursor_reset(struct callchain_cursor *cursor) { + struct callchain_cursor_node *node; + cursor->nr = 0; cursor->last = &cursor->first; + + for (node = cursor->first; node != NULL; node = node->next) + map__zput(node->map); } int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index a69f027368ef..10849a079026 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1,6 +1,7 @@ #include "util.h" #include "build-id.h" #include "hist.h" +#include "map.h" #include "session.h" #include "sort.h" #include "evlist.h" @@ -1019,6 +1020,10 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, int max_stack_depth, void *arg) { int err, err2; + struct map *alm = NULL; + + if (al && al->map) + alm = map__get(al->map); err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent, iter->evsel, al, max_stack_depth); @@ -1058,6 +1063,8 @@ out: if (!err) err = err2; + map__put(alm); + return err; } From abf74467e7fb19c957eb284798fc41cc8059dc41 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 28 Feb 2017 14:07:25 -0800 Subject: [PATCH 087/357] crypto: testmgr - Pad aes_ccm_enc_tv_template vector commit 1c68bb0f62bf8de8bb30123ea840d5168f25abea upstream. Running with KASAN and crypto tests currently gives BUG: KASAN: global-out-of-bounds in __test_aead+0x9d9/0x2200 at addr ffffffff8212fca0 Read of size 16 by task cryptomgr_test/1107 Address belongs to variable 0xffffffff8212fca0 CPU: 0 PID: 1107 Comm: cryptomgr_test Not tainted 4.10.0+ #45 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.1-1.fc24 04/01/2014 Call Trace: dump_stack+0x63/0x8a kasan_report.part.1+0x4a7/0x4e0 ? __test_aead+0x9d9/0x2200 ? crypto_ccm_init_crypt+0x218/0x3c0 [ccm] kasan_report+0x20/0x30 check_memory_region+0x13c/0x1a0 memcpy+0x23/0x50 __test_aead+0x9d9/0x2200 ? kasan_unpoison_shadow+0x35/0x50 ? alg_test_akcipher+0xf0/0xf0 ? crypto_skcipher_init_tfm+0x2e3/0x310 ? crypto_spawn_tfm2+0x37/0x60 ? crypto_ccm_init_tfm+0xa9/0xd0 [ccm] ? crypto_aead_init_tfm+0x7b/0x90 ? crypto_alloc_tfm+0xc4/0x190 test_aead+0x28/0xc0 alg_test_aead+0x54/0xd0 alg_test+0x1eb/0x3d0 ? alg_find_test+0x90/0x90 ? __sched_text_start+0x8/0x8 ? __wake_up_common+0x70/0xb0 cryptomgr_test+0x4d/0x60 kthread+0x173/0x1c0 ? crypto_acomp_scomp_free_ctx+0x60/0x60 ? kthread_create_on_node+0xa0/0xa0 ret_from_fork+0x2c/0x40 Memory state around the buggy address: ffffffff8212fb80: 00 00 00 00 01 fa fa fa fa fa fa fa 00 00 00 00 ffffffff8212fc00: 00 01 fa fa fa fa fa fa 00 00 00 00 01 fa fa fa >ffffffff8212fc80: fa fa fa fa 00 05 fa fa fa fa fa fa 00 00 00 00 ^ ffffffff8212fd00: 01 fa fa fa fa fa fa fa 00 00 00 00 01 fa fa fa ffffffff8212fd80: fa fa fa fa 00 00 00 00 00 05 fa fa fa fa fa fa This always happens on the same IV which is less than 16 bytes. Per Ard, "CCM IVs are 16 bytes, but due to the way they are constructed internally, the final couple of bytes of input IV are dont-cares. Apparently, we do read all 16 bytes, which triggers the KASAN errors." Fix this by padding the IV with null bytes to be at least 16 bytes. Fixes: 0bc5a6c5c79a ("crypto: testmgr - Disable rfc4309 test and convert test vectors") Acked-by: Ard Biesheuvel Signed-off-by: Laura Abbott Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/testmgr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/testmgr.h b/crypto/testmgr.h index e64a4ef9d8ca..9033088ca231 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -22813,7 +22813,7 @@ static struct aead_testvec aes_ccm_enc_tv_template[] = { "\x09\x75\x9a\x9b\x3c\x9b\x27\x39", .klen = 32, .iv = "\x03\xf9\xd9\x4e\x63\xb5\x3d\x9d" - "\x43\xf6\x1e\x50", + "\x43\xf6\x1e\x50\0\0\0\0", .assoc = "\x57\xf5\x6b\x8b\x57\x5c\x3d\x3b" "\x13\x02\x01\x0c\x83\x4c\x96\x35" "\x8e\xd6\x39\xcf\x7d\x14\x9b\x94" From 1e6be9c19c1286cdc3e87a27b06038361a0165ab Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 22 Feb 2017 20:08:25 +0100 Subject: [PATCH 088/357] fuse: add missing FR_FORCE commit 2e38bea99a80eab408adee27f873a188d57b76cb upstream. fuse_file_put() was missing the "force" flag for the RELEASE request when sending synchronously (fuseblk). If this flag is not set, then a sync request may be interrupted before it is dequeued by the userspace filesystem. In this case the OPEN won't be balanced with a RELEASE. Signed-off-by: Miklos Szeredi Fixes: 5a18ec176c93 ("fuse: fix hang of single threaded fuseblk filesystem") Signed-off-by: Greg Kroah-Hartman --- fs/fuse/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2401c5dabb2a..5ec5870e423a 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -100,6 +100,7 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) iput(req->misc.release.inode); fuse_put_request(ff->fc, req); } else if (sync) { + __set_bit(FR_FORCE, &req->flags); __clear_bit(FR_BACKGROUND, &req->flags); fuse_request_send(ff->fc, req); iput(req->misc.release.inode); From bfb55d4087cfc4346b1ec5a04cbac04e9df10ab4 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 23 Feb 2017 14:26:03 -0800 Subject: [PATCH 089/357] x86/pkeys: Check against max pkey to avoid overflows commit 58ab9a088ddac4efe823471275859d64f735577e upstream. Kirill reported a warning from UBSAN about undefined behavior when using protection keys. He is running on hardware that actually has support for it, which is not widely available. The warning triggers because of very large shifts of integers when doing a pkey_free() of a large, invalid value. This happens because we never check that the pkey "fits" into the mm_pkey_allocation_map(). I do not believe there is any danger here of anything bad happening other than some aliasing issues where somebody could do: pkey_free(35); and the kernel would effectively execute: pkey_free(8); While this might be confusing to an app that was doing something stupid, it has to do something stupid and the effects are limited to the app shooting itself in the foot. Signed-off-by: Dave Hansen Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Cc: kirill.shutemov@linux.intel.com Link: http://lkml.kernel.org/r/20170223222603.A022ED65@viggo.jf.intel.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pkeys.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 34684adb6899..b3b09b98896d 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -46,6 +46,15 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) { + /* + * "Allocated" pkeys are those that have been returned + * from pkey_alloc(). pkey 0 is special, and never + * returned from pkey_alloc(). + */ + if (pkey <= 0) + return false; + if (pkey >= arch_max_pkey()) + return false; return mm_pkey_allocation_map(mm) & (1U << pkey); } @@ -82,12 +91,6 @@ int mm_pkey_alloc(struct mm_struct *mm) static inline int mm_pkey_free(struct mm_struct *mm, int pkey) { - /* - * pkey 0 is special, always allocated and can never - * be freed. - */ - if (!pkey) - return -EINVAL; if (!mm_pkey_is_allocated(mm, pkey)) return -EINVAL; From ac4c8fcf5ebceb0a2b6342707e4835a13f5bcf22 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 25 Jan 2017 12:29:59 +0000 Subject: [PATCH 090/357] arm/arm64: KVM: Enforce unconditional flush to PoC when mapping to stage-2 commit 8f36ebaf21fdae99c091c67e8b6fab33969f2667 upstream. When we fault in a page, we flush it to the PoC (Point of Coherency) if the faulting vcpu has its own caches off, so that it can observe the page we just brought it. But if the vcpu has its caches on, we skip that step. Bad things happen when *another* vcpu tries to access that page with its own caches disabled. At that point, there is no garantee that the data has made it to the PoC, and we access stale data. The obvious fix is to always flush to PoC when a page is faulted in, no matter what the state of the vcpu is. Fixes: 2d58b733c876 ("arm64: KVM: force cache clean on page fault when caches are off") Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/kvm_mmu.h | 9 +-------- arch/arm64/include/asm/kvm_mmu.h | 3 +-- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 74a44727f8e1..a58bbaa3ec60 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -150,18 +150,12 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, * and iterate over the range. */ - bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached; - VM_BUG_ON(size & ~PAGE_MASK); - if (!need_flush && !icache_is_pipt()) - goto vipt_cache; - while (size) { void *va = kmap_atomic_pfn(pfn); - if (need_flush) - kvm_flush_dcache_to_poc(va, PAGE_SIZE); + kvm_flush_dcache_to_poc(va, PAGE_SIZE); if (icache_is_pipt()) __cpuc_coherent_user_range((unsigned long)va, @@ -173,7 +167,6 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, kunmap_atomic(va); } -vipt_cache: if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) { /* any kind of VIPT cache */ __flush_icache_all(); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 6f72fe8b0e3e..6d22017ebbad 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -241,8 +241,7 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, { void *va = page_address(pfn_to_page(pfn)); - if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) - kvm_flush_dcache_to_poc(va, size); + kvm_flush_dcache_to_poc(va, size); if (!icache_is_aliasing()) { /* PIPT */ flush_icache_range((unsigned long)va, From a8123045c94638114a0b1ab19fd218dd3a5cd0b5 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 25 Jan 2017 18:31:31 +0000 Subject: [PATCH 091/357] arm64: dma-mapping: Fix dma_mapping_error() when bypassing SWIOTLB commit adbe7e26f4257f72817495b9bce114284060b0d7 upstream. When bypassing SWIOTLB on small-memory systems, we need to avoid calling into swiotlb_dma_mapping_error() in exactly the same way as we avoid swiotlb_dma_supported(), because the former also relies on SWIOTLB state being initialised. Under the assumptions for which we skip SWIOTLB, dma_map_{single,page}() will only ever return the DMA-offset-adjusted physical address of the page passed in, thus we can report success unconditionally. Fixes: b67a8b29df7e ("arm64: mm: only initialize swiotlb when necessary") CC: Jisheng Zhang Reported-by: Aaro Koskinen Signed-off-by: Robin Murphy Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/dma-mapping.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 02265a589ef5..b5bf46ce873b 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -352,6 +352,13 @@ static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) return 1; } +static int __swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t addr) +{ + if (swiotlb) + return swiotlb_dma_mapping_error(hwdev, addr); + return 0; +} + static struct dma_map_ops swiotlb_dma_ops = { .alloc = __dma_alloc, .free = __dma_free, @@ -366,7 +373,7 @@ static struct dma_map_ops swiotlb_dma_ops = { .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, .sync_sg_for_device = __swiotlb_sync_sg_for_device, .dma_supported = __swiotlb_dma_supported, - .mapping_error = swiotlb_dma_mapping_error, + .mapping_error = __swiotlb_dma_mapping_error, }; static int __init atomic_pool_init(void) From 7127d43e1843400658e42c0b21dc598a59d603e4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 2 Feb 2017 17:32:14 +0000 Subject: [PATCH 092/357] arm64: fix erroneous __raw_read_system_reg() cases commit 7d0928f18bf890d2853281f59aba0dd5a46b34f9 upstream. Since it was introduced in commit da8d02d19ffdd201 ("arm64/capabilities: Make use of system wide safe value"), __raw_read_system_reg() has erroneously mapped some sysreg IDs to other registers. For the fields in ID_ISAR5_EL1, our local feature detection will be erroneous. We may spuriously detect that a feature is uniformly supported, or may fail to detect when it actually is, meaning some compat hwcaps may be erroneous (or not enforced upon hotplug). This patch corrects the erroneous entries. Signed-off-by: Mark Rutland Fixes: da8d02d19ffdd201 ("arm64/capabilities: Make use of system wide safe value") Reported-by: Catalin Marinas Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c02504ea304b..3a129d48674e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -653,15 +653,15 @@ static u64 __raw_read_system_reg(u32 sys_id) case SYS_ID_ISAR2_EL1: return read_cpuid(ID_ISAR2_EL1); case SYS_ID_ISAR3_EL1: return read_cpuid(ID_ISAR3_EL1); case SYS_ID_ISAR4_EL1: return read_cpuid(ID_ISAR4_EL1); - case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR4_EL1); + case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR5_EL1); case SYS_MVFR0_EL1: return read_cpuid(MVFR0_EL1); case SYS_MVFR1_EL1: return read_cpuid(MVFR1_EL1); case SYS_MVFR2_EL1: return read_cpuid(MVFR2_EL1); case SYS_ID_AA64PFR0_EL1: return read_cpuid(ID_AA64PFR0_EL1); - case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR0_EL1); + case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR1_EL1); case SYS_ID_AA64DFR0_EL1: return read_cpuid(ID_AA64DFR0_EL1); - case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR0_EL1); + case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR1_EL1); case SYS_ID_AA64MMFR0_EL1: return read_cpuid(ID_AA64MMFR0_EL1); case SYS_ID_AA64MMFR1_EL1: return read_cpuid(ID_AA64MMFR1_EL1); case SYS_ID_AA64MMFR2_EL1: return read_cpuid(ID_AA64MMFR2_EL1); From d408d23addbaa9af9e0db492f774d3245ecfeefd Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Thu, 2 Feb 2017 20:30:03 -0600 Subject: [PATCH 093/357] KVM: arm/arm64: vgic: Stop injecting the MSI occurrence twice commit 0bdbf3b071986ba80731203683cf623d5c0cacb1 upstream. The IRQFD framework calls the architecture dependent function twice if the corresponding GSI type is edge triggered. For ARM, the function kvm_set_msi() is getting called twice whenever the IRQFD receives the event signal. The rest of the code path is trying to inject the MSI without any validation checks. No need to call the function vgic_its_inject_msi() second time to avoid an unnecessary overhead in IRQ queue logic. It also avoids the possibility of VM seeing the MSI twice. Simple fix, return -1 if the argument 'level' value is zero. Reviewed-by: Eric Auger Reviewed-by: Christoffer Dall Signed-off-by: Shanker Donthineni Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic/vgic-irqfd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index d918dcf26a5a..f138ed2e9c63 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c @@ -99,6 +99,9 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, if (!vgic_has_its(kvm)) return -ENODEV; + if (!level) + return -1; + return vgic_its_inject_msi(kvm, &msi); } From 602bd10f6e947308ffa7f9c2d11d07b9997aaee5 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 1 Feb 2017 21:40:57 +0100 Subject: [PATCH 094/357] iio: pressure: mpl115: do not rely on structure field ordering commit 6a6e1d56a0769795a36c0461c64bf5e5b9bbb4c0 upstream. Fixes a regression triggered by a change in the layout of struct iio_chan_spec, but the real bug is in the driver which assumed a specific structure layout in the first place. Hint: the three bits were not OR:ed together as implied by the indentation prior to this patch, there was a comma between the first two, which accidentally moved the ..._SCALE and ..._OFFSET bits to the next structure field. That field was .info_mask_shared_by_type before the _available attributes was added by commit 51239600074b ("iio:core: add a callback to allow drivers to provide _available attributes") and .info_mask_separate_available afterwards, and the regression happened. info_mask_shared_by_type is actually a better choice than the originally intended info_mask_separate for the ..._SCALE and ..._OFFSET bits since a constant is returned from mpl115_read_raw for the scale/offset. Using info_mask_shared_by_type also preserves the behavior from before the regression and is therefore less likely to cause other interesting side effects. The above mentioned regression causes unintended sysfs attibutes to show up that are not backed by code, in turn causing a NULL pointer defererence to happen on access. Fixes: 3017d90e8931 ("iio: Add Freescale MPL115A2 pressure / temperature sensor driver") Fixes: 51239600074b ("iio:core: add a callback to allow drivers to provide _available attributes") Signed-off-by: Peter Rosin Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/pressure/mpl115.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/pressure/mpl115.c b/drivers/iio/pressure/mpl115.c index 73f2f0c46e62..8f2bce213248 100644 --- a/drivers/iio/pressure/mpl115.c +++ b/drivers/iio/pressure/mpl115.c @@ -137,6 +137,7 @@ static const struct iio_chan_spec mpl115_channels[] = { { .type = IIO_TEMP, .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_OFFSET) | BIT(IIO_CHAN_INFO_SCALE), }, }; From a34546c6234e77dc98f5ac40ad64ee5429c986e6 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 1 Feb 2017 21:40:56 +0100 Subject: [PATCH 095/357] iio: pressure: mpl3115: do not rely on structure field ordering commit 9cf6cdba586ced75c69b8314b88b2d2f5ce9b3ed upstream. Fixes a regression triggered by a change in the layout of struct iio_chan_spec, but the real bug is in the driver which assumed a specific structure layout in the first place. Hint: the two bits were not OR:ed together as implied by the indentation prior to this patch, there was a comma between them, which accidentally moved the ..._SCALE bit to the next structure field. That field was .info_mask_shared_by_type before the _available attributes was added by commit 51239600074b ("iio:core: add a callback to allow drivers to provide _available attributes") and .info_mask_separate_available afterwards, and the regression happened. info_mask_shared_by_type is actually a better choice than the originally intended info_mask_separate for the ..._SCALE bit since a constant is returned from mpl3115_read_raw for the scale. Using info_mask_shared_by_type also preserves the behavior from before the regression and is therefore less likely to cause other interesting side effects. The above mentioned regression causes an unintended sysfs attibute to show up that is not backed by code, in turn causing the following NULL pointer defererence to happen on access. Segmentation fault Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = ecc3c000 [00000000] *pgd=87f91831 Internal error: Oops: 80000007 [#1] SMP ARM Modules linked in: CPU: 1 PID: 1051 Comm: cat Not tainted 4.10.0-rc5-00009-gffd8858-dirty #3 Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) task: ed54ec00 task.stack: ee2bc000 PC is at 0x0 LR is at iio_read_channel_info_avail+0x40/0x280 pc : [<00000000>] lr : [] psr: a0070013 sp : ee2bdda8 ip : 00000000 fp : ee2bddf4 r10: c0a53c74 r9 : ed79f000 r8 : ee8d1018 r7 : 00001000 r6 : 00000fff r5 : ee8b9a00 r4 : ed79f000 r3 : ee2bddc4 r2 : ee2bddbc r1 : c0a86dcc r0 : ee8d1000 Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 3cc3c04a DAC: 00000051 Process cat (pid: 1051, stack limit = 0xee2bc210) Stack: (0xee2bdda8 to 0xee2be000) dda0: ee2bddc0 00000002 c016d720 c016d394 ed54ec00 00000000 ddc0: 60070013 ed413780 00000001 edffd480 ee8b9a00 00000fff 00001000 ee8d1018 dde0: ed79f000 c0a53c74 ee2bde0c ee2bddf8 c0513c58 c06fbbe8 edffd480 edffd540 de00: ee2bde3c ee2bde10 c0293474 c0513c40 c02933e4 ee2bde60 00000001 ed413780 de20: 00000001 ed413780 00000000 edffd480 ee2bde4c ee2bde40 c0291d00 c02933f0 de40: ee2bde9c ee2bde50 c024679c c0291ce0 edffd4b0 b6e37000 00020000 ee2bdf78 de60: 00000000 00000000 ed54ec00 ed013200 00000817 c0a111fc edffd540 ed413780 de80: b6e37000 00020000 00020000 ee2bdf78 ee2bded4 ee2bdea0 c0292890 c0246604 dea0: c0117940 c016ba50 00000025 c0a111fc b6e37000 ed413780 ee2bdf78 00020000 dec0: ee2bc000 b6e37000 ee2bdf44 ee2bded8 c021d158 c0292770 c0117764 b6e36004 dee0: c0f0d7c4 ee2bdfb0 b6f89228 00021008 ee2bdfac ee2bdf00 c0101374 c0117770 df00: 00000000 00000000 ee2bc000 00000000 ee2bdf34 ee2bdf20 c016ba04 c0171080 df20: 00000000 00020000 ed413780 b6e37000 00000000 ee2bdf78 ee2bdf74 ee2bdf48 df40: c021e7a0 c021d130 c023e300 c023e280 ee2bdf74 00000000 00000000 ed413780 df60: ed413780 00020000 ee2bdfa4 ee2bdf78 c021e870 c021e71c 00000000 00000000 df80: 00020000 00020000 b6e37000 00000003 c0108084 00000000 00000000 ee2bdfa8 dfa0: c0107ee0 c021e838 00020000 00020000 00000003 b6e37000 00020000 0001a2b4 dfc0: 00020000 00020000 b6e37000 00000003 7fffe000 00000000 00000000 00020000 dfe0: 00000000 be98eb4c 0000c740 b6f1985c 60070010 00000003 00000000 00000000 Backtrace: [] (iio_read_channel_info_avail) from [] (dev_attr_show+0x24/0x50) r10:c0a53c74 r9:ed79f000 r8:ee8d1018 r7:00001000 r6:00000fff r5:ee8b9a00 r4:edffd480 [] (dev_attr_show) from [] (sysfs_kf_seq_show+0x90/0x110) r5:edffd540 r4:edffd480 [] (sysfs_kf_seq_show) from [] (kernfs_seq_show+0x2c/0x30) r10:edffd480 r9:00000000 r8:ed413780 r7:00000001 r6:ed413780 r5:00000001 r4:ee2bde60 r3:c02933e4 [] (kernfs_seq_show) from [] (seq_read+0x1a4/0x4e0) [] (seq_read) from [] (kernfs_fop_read+0x12c/0x1cc) r10:ee2bdf78 r9:00020000 r8:00020000 r7:b6e37000 r6:ed413780 r5:edffd540 r4:c0a111fc [] (kernfs_fop_read) from [] (__vfs_read+0x34/0x118) r10:b6e37000 r9:ee2bc000 r8:00020000 r7:ee2bdf78 r6:ed413780 r5:b6e37000 r4:c0a111fc [] (__vfs_read) from [] (vfs_read+0x90/0x11c) r8:ee2bdf78 r7:00000000 r6:b6e37000 r5:ed413780 r4:00020000 [] (vfs_read) from [] (SyS_read+0x44/0x90) r8:00020000 r7:ed413780 r6:ed413780 r5:00000000 r4:00000000 [] (SyS_read) from [] (ret_fast_syscall+0x0/0x1c) r10:00000000 r8:c0108084 r7:00000003 r6:b6e37000 r5:00020000 r4:00020000 Code: bad PC value ---[ end trace 9c4938ccd0389004 ]--- Fixes: cc26ad455f57 ("iio: Add Freescale MPL3115A2 pressure / temperature sensor driver") Fixes: 51239600074b ("iio:core: add a callback to allow drivers to provide _available attributes") Reported-by: Ken Lin Tested-by: Ken Lin Signed-off-by: Peter Rosin Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/pressure/mpl3115.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/pressure/mpl3115.c b/drivers/iio/pressure/mpl3115.c index 6392d7b62841..eb87948fc559 100644 --- a/drivers/iio/pressure/mpl3115.c +++ b/drivers/iio/pressure/mpl3115.c @@ -182,7 +182,7 @@ static const struct iio_chan_spec mpl3115_channels[] = { { .type = IIO_PRESSURE, .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - BIT(IIO_CHAN_INFO_SCALE), + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), .scan_index = 0, .scan_type = { .sign = 'u', @@ -195,7 +195,7 @@ static const struct iio_chan_spec mpl3115_channels[] = { { .type = IIO_TEMP, .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - BIT(IIO_CHAN_INFO_SCALE), + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), .scan_index = 1, .scan_type = { .sign = 's', From cec7abd27e878e3c83dc9af41ee87a2e9d483ac0 Mon Sep 17 00:00:00 2001 From: Ethan Zonca Date: Fri, 24 Feb 2017 11:27:36 -0500 Subject: [PATCH 096/357] can: gs_usb: Don't use stack memory for USB transfers commit c919a3069c775c1c876bec55e00b2305d5125caa upstream. Fixes: 05ca5270005c can: gs_usb: add ethtool set_phys_id callback to locate physical device The gs_usb driver is performing USB transfers using buffers allocated on the stack. This causes the driver to not function with vmapped stacks. Instead, allocate memory for the transfer buffers. Signed-off-by: Ethan Zonca Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/gs_usb.c | 40 ++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 77e3cc06a30c..a0dabd4038ba 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -908,10 +908,14 @@ static int gs_usb_probe(struct usb_interface *intf, struct gs_usb *dev; int rc = -ENOMEM; unsigned int icount, i; - struct gs_host_config hconf = { - .byte_order = 0x0000beef, - }; - struct gs_device_config dconf; + struct gs_host_config *hconf; + struct gs_device_config *dconf; + + hconf = kmalloc(sizeof(*hconf), GFP_KERNEL); + if (!hconf) + return -ENOMEM; + + hconf->byte_order = 0x0000beef; /* send host config */ rc = usb_control_msg(interface_to_usbdev(intf), @@ -920,16 +924,22 @@ static int gs_usb_probe(struct usb_interface *intf, USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE, 1, intf->altsetting[0].desc.bInterfaceNumber, - &hconf, - sizeof(hconf), + hconf, + sizeof(*hconf), 1000); + kfree(hconf); + if (rc < 0) { dev_err(&intf->dev, "Couldn't send data format (err=%d)\n", rc); return rc; } + dconf = kmalloc(sizeof(*dconf), GFP_KERNEL); + if (!dconf) + return -ENOMEM; + /* read device config */ rc = usb_control_msg(interface_to_usbdev(intf), usb_rcvctrlpipe(interface_to_usbdev(intf), 0), @@ -937,28 +947,33 @@ static int gs_usb_probe(struct usb_interface *intf, USB_DIR_IN|USB_TYPE_VENDOR|USB_RECIP_INTERFACE, 1, intf->altsetting[0].desc.bInterfaceNumber, - &dconf, - sizeof(dconf), + dconf, + sizeof(*dconf), 1000); if (rc < 0) { dev_err(&intf->dev, "Couldn't get device config: (err=%d)\n", rc); + kfree(dconf); return rc; } - icount = dconf.icount + 1; + icount = dconf->icount + 1; dev_info(&intf->dev, "Configuring for %d interfaces\n", icount); if (icount > GS_MAX_INTF) { dev_err(&intf->dev, "Driver cannot handle more that %d CAN interfaces\n", GS_MAX_INTF); + kfree(dconf); return -EINVAL; } dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) + if (!dev) { + kfree(dconf); return -ENOMEM; + } + init_usb_anchor(&dev->rx_submitted); atomic_set(&dev->active_channels, 0); @@ -967,7 +982,7 @@ static int gs_usb_probe(struct usb_interface *intf, dev->udev = interface_to_usbdev(intf); for (i = 0; i < icount; i++) { - dev->canch[i] = gs_make_candev(i, intf, &dconf); + dev->canch[i] = gs_make_candev(i, intf, dconf); if (IS_ERR_OR_NULL(dev->canch[i])) { /* save error code to return later */ rc = PTR_ERR(dev->canch[i]); @@ -978,12 +993,15 @@ static int gs_usb_probe(struct usb_interface *intf, gs_destroy_candev(dev->canch[i]); usb_kill_anchored_urbs(&dev->rx_submitted); + kfree(dconf); kfree(dev); return rc; } dev->canch[i]->parent = dev; } + kfree(dconf); + return 0; } From 7afc0ee6ae73c23d8acc7a58113e269f6c1da2a2 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 2 Mar 2017 12:03:40 +0100 Subject: [PATCH 097/357] can: usb_8dev: Fix memory leak of priv->cmd_msg_buffer commit 7c42631376306fb3f34d51fda546b50a9b6dd6ec upstream. The priv->cmd_msg_buffer is allocated in the probe function, but never kfree()ed. This patch converts the kzalloc() to resource-managed kzalloc. Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/usb_8dev.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index 108a30e15097..d000cb62d6ae 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -951,8 +951,8 @@ static int usb_8dev_probe(struct usb_interface *intf, for (i = 0; i < MAX_TX_URBS; i++) priv->tx_contexts[i].echo_index = MAX_TX_URBS; - priv->cmd_msg_buffer = kzalloc(sizeof(struct usb_8dev_cmd_msg), - GFP_KERNEL); + priv->cmd_msg_buffer = devm_kzalloc(&intf->dev, sizeof(struct usb_8dev_cmd_msg), + GFP_KERNEL); if (!priv->cmd_msg_buffer) goto cleanup_candev; @@ -966,7 +966,7 @@ static int usb_8dev_probe(struct usb_interface *intf, if (err) { netdev_err(netdev, "couldn't register CAN device: %d\n", err); - goto cleanup_cmd_msg_buffer; + goto cleanup_candev; } err = usb_8dev_cmd_version(priv, &version); @@ -987,9 +987,6 @@ static int usb_8dev_probe(struct usb_interface *intf, cleanup_unregister_candev: unregister_netdev(priv->netdev); -cleanup_cmd_msg_buffer: - kfree(priv->cmd_msg_buffer); - cleanup_candev: free_candev(netdev); From 1de86951101be4a3bcee9355a10f327bba491ec5 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Sat, 21 Jan 2017 23:50:18 +0100 Subject: [PATCH 098/357] w1: don't leak refcount on slave attach failure in w1_attach_slave_device() commit d2ce4ea1a0b0162e5d2d7e7942ab6f5cc2063d5a upstream. Near the beginning of w1_attach_slave_device() we increment a w1 master reference count. Later, when we are going to exit this function without actually attaching a slave device (due to failure of __w1_attach_slave_device()) we need to decrement this reference count back. Signed-off-by: Maciej S. Szmigiero Fixes: 9fcbbac5ded489 ("w1: process w1 netlink commands in w1_process thread") Cc: Evgeniy Polyakov Signed-off-by: Greg Kroah-Hartman --- drivers/w1/w1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index e213c678bbfe..ab0931e7a9bb 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -763,6 +763,7 @@ int w1_attach_slave_device(struct w1_master *dev, struct w1_reg_num *rn) dev_err(&dev->dev, "%s: Attaching %s failed.\n", __func__, sl->name); w1_family_put(sl->family); + atomic_dec(&sl->master->refcnt); kfree(sl); return err; } From c259832acf57c51971582c84c89b2941a8a9e1ef Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Wed, 18 Jan 2017 21:31:11 +0100 Subject: [PATCH 099/357] w1: ds2490: USB transfer buffers need to be DMAable commit 61cd1b4cd1e8f7f7642ab64529d9bd52e8374641 upstream. ds2490 driver was doing USB transfers from / to buffers on a stack. This is not permitted and made the driver non-working with vmapped stacks. Since all these transfers are done under the same bus_mutex lock we can simply use shared buffers in a device private structure for two most common of them. While we are at it, let's also fix a comparison between int and size_t in ds9490r_search() which made the driver spin in this function if state register get requests were failing. Signed-off-by: Maciej S. Szmigiero Acked-by: Evgeniy Polyakov Signed-off-by: Greg Kroah-Hartman --- drivers/w1/masters/ds2490.c | 142 +++++++++++++++++++++--------------- 1 file changed, 84 insertions(+), 58 deletions(-) diff --git a/drivers/w1/masters/ds2490.c b/drivers/w1/masters/ds2490.c index 049a884a756f..59d74d1b47a8 100644 --- a/drivers/w1/masters/ds2490.c +++ b/drivers/w1/masters/ds2490.c @@ -153,6 +153,9 @@ struct ds_device */ u16 spu_bit; + u8 st_buf[ST_SIZE]; + u8 byte_buf; + struct w1_bus_master master; }; @@ -174,7 +177,6 @@ struct ds_status u8 data_in_buffer_status; u8 reserved1; u8 reserved2; - }; static struct usb_device_id ds_id_table [] = { @@ -244,28 +246,6 @@ static int ds_send_control(struct ds_device *dev, u16 value, u16 index) return err; } -static int ds_recv_status_nodump(struct ds_device *dev, struct ds_status *st, - unsigned char *buf, int size) -{ - int count, err; - - memset(st, 0, sizeof(*st)); - - count = 0; - err = usb_interrupt_msg(dev->udev, usb_rcvintpipe(dev->udev, - dev->ep[EP_STATUS]), buf, size, &count, 1000); - if (err < 0) { - pr_err("Failed to read 1-wire data from 0x%x: err=%d.\n", - dev->ep[EP_STATUS], err); - return err; - } - - if (count >= sizeof(*st)) - memcpy(st, buf, sizeof(*st)); - - return count; -} - static inline void ds_print_msg(unsigned char *buf, unsigned char *str, int off) { pr_info("%45s: %8x\n", str, buf[off]); @@ -324,6 +304,35 @@ static void ds_dump_status(struct ds_device *dev, unsigned char *buf, int count) } } +static int ds_recv_status(struct ds_device *dev, struct ds_status *st, + bool dump) +{ + int count, err; + + if (st) + memset(st, 0, sizeof(*st)); + + count = 0; + err = usb_interrupt_msg(dev->udev, + usb_rcvintpipe(dev->udev, + dev->ep[EP_STATUS]), + dev->st_buf, sizeof(dev->st_buf), + &count, 1000); + if (err < 0) { + pr_err("Failed to read 1-wire data from 0x%x: err=%d.\n", + dev->ep[EP_STATUS], err); + return err; + } + + if (dump) + ds_dump_status(dev, dev->st_buf, count); + + if (st && count >= sizeof(*st)) + memcpy(st, dev->st_buf, sizeof(*st)); + + return count; +} + static void ds_reset_device(struct ds_device *dev) { ds_send_control_cmd(dev, CTL_RESET_DEVICE, 0); @@ -344,7 +353,6 @@ static void ds_reset_device(struct ds_device *dev) static int ds_recv_data(struct ds_device *dev, unsigned char *buf, int size) { int count, err; - struct ds_status st; /* Careful on size. If size is less than what is available in * the input buffer, the device fails the bulk transfer and @@ -359,14 +367,9 @@ static int ds_recv_data(struct ds_device *dev, unsigned char *buf, int size) err = usb_bulk_msg(dev->udev, usb_rcvbulkpipe(dev->udev, dev->ep[EP_DATA_IN]), buf, size, &count, 1000); if (err < 0) { - u8 buf[ST_SIZE]; - int count; - pr_info("Clearing ep0x%x.\n", dev->ep[EP_DATA_IN]); usb_clear_halt(dev->udev, usb_rcvbulkpipe(dev->udev, dev->ep[EP_DATA_IN])); - - count = ds_recv_status_nodump(dev, &st, buf, sizeof(buf)); - ds_dump_status(dev, buf, count); + ds_recv_status(dev, NULL, true); return err; } @@ -404,7 +407,6 @@ int ds_stop_pulse(struct ds_device *dev, int limit) { struct ds_status st; int count = 0, err = 0; - u8 buf[ST_SIZE]; do { err = ds_send_control(dev, CTL_HALT_EXE_IDLE, 0); @@ -413,7 +415,7 @@ int ds_stop_pulse(struct ds_device *dev, int limit) err = ds_send_control(dev, CTL_RESUME_EXE, 0); if (err) break; - err = ds_recv_status_nodump(dev, &st, buf, sizeof(buf)); + err = ds_recv_status(dev, &st, false); if (err) break; @@ -456,18 +458,17 @@ int ds_detect(struct ds_device *dev, struct ds_status *st) static int ds_wait_status(struct ds_device *dev, struct ds_status *st) { - u8 buf[ST_SIZE]; int err, count = 0; do { st->status = 0; - err = ds_recv_status_nodump(dev, st, buf, sizeof(buf)); + err = ds_recv_status(dev, st, false); #if 0 if (err >= 0) { int i; printk("0x%x: count=%d, status: ", dev->ep[EP_STATUS], err); for (i=0; ist_buf[i]); printk("\n"); } #endif @@ -485,7 +486,7 @@ static int ds_wait_status(struct ds_device *dev, struct ds_status *st) * can do something with it). */ if (err > 16 || count >= 100 || err < 0) - ds_dump_status(dev, buf, err); + ds_dump_status(dev, dev->st_buf, err); /* Extended data isn't an error. Well, a short is, but the dump * would have already told the user that and we can't do anything @@ -608,7 +609,6 @@ static int ds_write_byte(struct ds_device *dev, u8 byte) { int err; struct ds_status st; - u8 rbyte; err = ds_send_control(dev, COMM_BYTE_IO | COMM_IM | dev->spu_bit, byte); if (err) @@ -621,11 +621,11 @@ static int ds_write_byte(struct ds_device *dev, u8 byte) if (err) return err; - err = ds_recv_data(dev, &rbyte, sizeof(rbyte)); + err = ds_recv_data(dev, &dev->byte_buf, 1); if (err < 0) return err; - return !(byte == rbyte); + return !(byte == dev->byte_buf); } static int ds_read_byte(struct ds_device *dev, u8 *byte) @@ -712,7 +712,6 @@ static void ds9490r_search(void *data, struct w1_master *master, int err; u16 value, index; struct ds_status st; - u8 st_buf[ST_SIZE]; int search_limit; int found = 0; int i; @@ -724,7 +723,12 @@ static void ds9490r_search(void *data, struct w1_master *master, /* FIFO 128 bytes, bulk packet size 64, read a multiple of the * packet size. */ - u64 buf[2*64/8]; + const size_t bufsize = 2 * 64; + u64 *buf; + + buf = kmalloc(bufsize, GFP_KERNEL); + if (!buf) + return; mutex_lock(&master->bus_mutex); @@ -745,10 +749,9 @@ static void ds9490r_search(void *data, struct w1_master *master, do { schedule_timeout(jtime); - if (ds_recv_status_nodump(dev, &st, st_buf, sizeof(st_buf)) < - sizeof(st)) { + err = ds_recv_status(dev, &st, false); + if (err < 0 || err < sizeof(st)) break; - } if (st.data_in_buffer_status) { /* Bulk in can receive partial ids, but when it does @@ -758,7 +761,7 @@ static void ds9490r_search(void *data, struct w1_master *master, * bulk without first checking if status says there * is data to read. */ - err = ds_recv_data(dev, (u8 *)buf, sizeof(buf)); + err = ds_recv_data(dev, (u8 *)buf, bufsize); if (err < 0) break; for (i = 0; i < err/8; ++i) { @@ -794,9 +797,14 @@ static void ds9490r_search(void *data, struct w1_master *master, } search_out: mutex_unlock(&master->bus_mutex); + kfree(buf); } #if 0 +/* + * FIXME: if this disabled code is ever used in the future all ds_send_data() + * calls must be changed to use a DMAable buffer. + */ static int ds_match_access(struct ds_device *dev, u64 init) { int err; @@ -845,13 +853,12 @@ static int ds_set_path(struct ds_device *dev, u64 init) static u8 ds9490r_touch_bit(void *data, u8 bit) { - u8 ret; struct ds_device *dev = data; - if (ds_touch_bit(dev, bit, &ret)) + if (ds_touch_bit(dev, bit, &dev->byte_buf)) return 0; - return ret; + return dev->byte_buf; } #if 0 @@ -866,13 +873,12 @@ static u8 ds9490r_read_bit(void *data) { struct ds_device *dev = data; int err; - u8 bit = 0; - err = ds_touch_bit(dev, 1, &bit); + err = ds_touch_bit(dev, 1, &dev->byte_buf); if (err) return 0; - return bit & 1; + return dev->byte_buf & 1; } #endif @@ -887,32 +893,52 @@ static u8 ds9490r_read_byte(void *data) { struct ds_device *dev = data; int err; - u8 byte = 0; - err = ds_read_byte(dev, &byte); + err = ds_read_byte(dev, &dev->byte_buf); if (err) return 0; - return byte; + return dev->byte_buf; } static void ds9490r_write_block(void *data, const u8 *buf, int len) { struct ds_device *dev = data; + u8 *tbuf; - ds_write_block(dev, (u8 *)buf, len); + if (len <= 0) + return; + + tbuf = kmalloc(len, GFP_KERNEL); + if (!tbuf) + return; + + memcpy(tbuf, buf, len); + ds_write_block(dev, tbuf, len); + + kfree(tbuf); } static u8 ds9490r_read_block(void *data, u8 *buf, int len) { struct ds_device *dev = data; int err; + u8 *tbuf; - err = ds_read_block(dev, buf, len); - if (err < 0) + if (len <= 0) return 0; - return len; + tbuf = kmalloc(len, GFP_KERNEL); + if (!tbuf) + return 0; + + err = ds_read_block(dev, tbuf, len); + if (err >= 0) + memcpy(buf, tbuf, len); + + kfree(tbuf); + + return err >= 0 ? len : 0; } static u8 ds9490r_reset(void *data) From 5413f432a1f33293b9183fadb1b44e29b168aa24 Mon Sep 17 00:00:00 2001 From: Alexandre Bailon Date: Wed, 1 Feb 2017 21:30:12 -0600 Subject: [PATCH 100/357] usb: musb: da8xx: Remove CPPI 3.0 quirk and methods commit a994ce2d7e66008381a0b184c73be9ae9b72eb5c upstream. DA8xx driver is registering and using the CPPI 3.0 DMA controller but actually, the DA8xx has a CPPI 4.1 DMA controller. Remove the CPPI 3.0 quirk and methods. Fixes: f8e9f34f80a2 ("usb: musb: Fix up DMA related macros") Fixes: 7f6283ed6fe8 ("usb: musb: Set up function pointers for DMA") Signed-off-by: Alexandre Bailon Acked-by: Sergei Shtylyov Acked-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/da8xx.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c index 2440f88e07a3..bacee0fd4dd3 100644 --- a/drivers/usb/musb/da8xx.c +++ b/drivers/usb/musb/da8xx.c @@ -434,15 +434,11 @@ static int da8xx_musb_exit(struct musb *musb) } static const struct musb_platform_ops da8xx_ops = { - .quirks = MUSB_DMA_CPPI | MUSB_INDEXED_EP, + .quirks = MUSB_INDEXED_EP, .init = da8xx_musb_init, .exit = da8xx_musb_exit, .fifo_mode = 2, -#ifdef CONFIG_USB_TI_CPPI_DMA - .dma_init = cppi_dma_controller_create, - .dma_exit = cppi_dma_controller_destroy, -#endif .enable = da8xx_musb_enable, .disable = da8xx_musb_disable, From 3ccf60e1ff5e6cbd47b860b95f6cb53ebd98113b Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 19 Jan 2017 13:38:42 +0200 Subject: [PATCH 101/357] usb: dwc3: gadget: skip Set/Clear Halt when invalid commit ffb80fc672c3a7b6afd0cefcb1524fb99917b2f3 upstream. At least macOS seems to be sending ClearFeature(ENDPOINT_HALT) to endpoints which aren't Halted. This makes DWC3's CLEARSTALL command time out which causes several issues for the driver. Instead, let's just return 0 and bail out early. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index d2b860ebfe13..5dc6bfc91f4b 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1234,6 +1234,9 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) unsigned transfer_in_flight; unsigned started; + if (dep->flags & DWC3_EP_STALL) + return 0; + if (dep->number > 1) trb = dwc3_ep_prev_trb(dep, dep->trb_enqueue); else @@ -1258,6 +1261,8 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) else dep->flags |= DWC3_EP_STALL; } else { + if (!(dep->flags & DWC3_EP_STALL)) + return 0; ret = dwc3_send_clear_stall_ep_cmd(dep); if (ret) From 82bd560d9552a7b30ad1123591de63684fe0a63e Mon Sep 17 00:00:00 2001 From: William wu Date: Tue, 17 Jan 2017 15:32:07 +0800 Subject: [PATCH 102/357] usb: host: xhci: plat: check hcc_params after add hcd commit 5de4e1ea9a731cad195ce5152705c21daef3bbba upstream. The commit 4ac53087d6d4 ("usb: xhci: plat: Create both HCDs before adding them") move add hcd to the end of probe, this cause hcc_params uninitiated, because xHCI driver sets hcc_params in xhci_gen_setup() called from usb_add_hcd(). This patch checks the Maximum Primary Stream Array Size in the hcc_params register after add primary hcd. Signed-off-by: William wu Acked-by: Roger Quadros Fixes: 4ac53087d6d4 ("usb: xhci: plat: Create both HCDs before adding them") Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index ed56bf9ed885..abe360684f0b 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -223,9 +223,6 @@ static int xhci_plat_probe(struct platform_device *pdev) if (device_property_read_bool(&pdev->dev, "usb3-lpm-capable")) xhci->quirks |= XHCI_LPM_SUPPORT; - if (HCC_MAX_PSA(xhci->hcc_params) >= 4) - xhci->shared_hcd->can_do_streams = 1; - hcd->usb_phy = devm_usb_get_phy_by_phandle(&pdev->dev, "usb-phy", 0); if (IS_ERR(hcd->usb_phy)) { ret = PTR_ERR(hcd->usb_phy); @@ -242,6 +239,9 @@ static int xhci_plat_probe(struct platform_device *pdev) if (ret) goto disable_usb_phy; + if (HCC_MAX_PSA(xhci->hcc_params) >= 4) + xhci->shared_hcd->can_do_streams = 1; + ret = usb_add_hcd(xhci->shared_hcd, irq, IRQF_SHARED); if (ret) goto dealloc_usb2_hcd; From 881b5225558e2bad2f02aca4058b269dce0a834d Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Mon, 16 Jan 2017 08:40:57 +0100 Subject: [PATCH 103/357] usb: gadget: udc-core: Rescan pending list on driver unbind commit 8236800da115a3e24b9165c573067343f51cf5ea upstream. Since: commit 855ed04a3758 ("usb: gadget: udc-core: independent registration of gadgets and gadget drivers") if we load gadget module but there is no free udc available then it will be stored on a pending gadgets list. $ modprobe g_zero.ko $ modprobe g_ether.ko [] udc-core: couldn't find an available UDC - added [g_ether] to list of pending drivers We scan this list each time when new UDC appears in system. But we can get a free UDC each time after gadget unbind. This commit add scanning of that list directly after unbinding gadget from udc. Thanks to this, when we unload first gadget: $ rmmod g_zero.ko gadget which is pending is automatically attached to that UDC (if name matches). Fixes: 855ed04a3758 ("usb: gadget: udc-core: independent registration of gadgets and gadget drivers") Signed-off-by: Krzysztof Opasiak Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 45 ++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 0402177f93cd..d685d82dcf48 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1080,6 +1080,24 @@ static void usb_udc_nop_release(struct device *dev) dev_vdbg(dev, "%s\n", __func__); } +/* should be called with udc_lock held */ +static int check_pending_gadget_drivers(struct usb_udc *udc) +{ + struct usb_gadget_driver *driver; + int ret = 0; + + list_for_each_entry(driver, &gadget_driver_pending_list, pending) + if (!driver->udc_name || strcmp(driver->udc_name, + dev_name(&udc->dev)) == 0) { + ret = udc_bind_to_driver(udc, driver); + if (ret != -EPROBE_DEFER) + list_del(&driver->pending); + break; + } + + return ret; +} + /** * usb_add_gadget_udc_release - adds a new gadget to the udc class driver list * @parent: the parent device to this udc. Usually the controller driver's @@ -1093,7 +1111,6 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget, void (*release)(struct device *dev)) { struct usb_udc *udc; - struct usb_gadget_driver *driver; int ret = -ENOMEM; udc = kzalloc(sizeof(*udc), GFP_KERNEL); @@ -1136,17 +1153,9 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget, udc->vbus = true; /* pick up one of pending gadget drivers */ - list_for_each_entry(driver, &gadget_driver_pending_list, pending) { - if (!driver->udc_name || strcmp(driver->udc_name, - dev_name(&udc->dev)) == 0) { - ret = udc_bind_to_driver(udc, driver); - if (ret != -EPROBE_DEFER) - list_del(&driver->pending); - if (ret) - goto err5; - break; - } - } + ret = check_pending_gadget_drivers(udc); + if (ret) + goto err5; mutex_unlock(&udc_lock); @@ -1356,14 +1365,22 @@ int usb_gadget_unregister_driver(struct usb_gadget_driver *driver) return -EINVAL; mutex_lock(&udc_lock); - list_for_each_entry(udc, &udc_list, list) + list_for_each_entry(udc, &udc_list, list) { if (udc->driver == driver) { usb_gadget_remove_driver(udc); usb_gadget_set_state(udc->gadget, - USB_STATE_NOTATTACHED); + USB_STATE_NOTATTACHED); + + /* Maybe there is someone waiting for this UDC? */ + check_pending_gadget_drivers(udc); + /* + * For now we ignore bind errors as probably it's + * not a valid reason to fail other's gadget unbind + */ ret = 0; break; } + } if (ret) { list_del(&driver->pending); From 92ee9483bc5fe7d4c2e6730515849467d818f8f7 Mon Sep 17 00:00:00 2001 From: Magnus Lilja Date: Wed, 25 Jan 2017 22:07:59 +0100 Subject: [PATCH 104/357] usb: gadget: udc: fsl: Add missing complete function. commit 5528954a1a0c49c6974ef1b8d6eaceff536204d5 upstream. Commit 304f7e5e1d08 ("usb: gadget: Refactor request completion") removed check if req->req.complete is non-NULL, resulting in a NULL pointer derefence and a kernel panic. This patch adds an empty complete function instead of re-introducing the req->req.complete check. Fixes: 304f7e5e1d08 ("usb: gadget: Refactor request completion") Signed-off-by: Magnus Lilja Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/fsl_udc_core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c index aab5221d6c2e..aac0ce8aeb0b 100644 --- a/drivers/usb/gadget/udc/fsl_udc_core.c +++ b/drivers/usb/gadget/udc/fsl_udc_core.c @@ -1249,6 +1249,12 @@ static const struct usb_gadget_ops fsl_gadget_ops = { .udc_stop = fsl_udc_stop, }; +/* + * Empty complete function used by this driver to fill in the req->complete + * field when creating a request since the complete field is mandatory. + */ +static void fsl_noop_complete(struct usb_ep *ep, struct usb_request *req) { } + /* Set protocol stall on ep0, protocol stall will automatically be cleared on new transaction */ static void ep0stall(struct fsl_udc *udc) @@ -1283,7 +1289,7 @@ static int ep0_prime_status(struct fsl_udc *udc, int direction) req->req.length = 0; req->req.status = -EINPROGRESS; req->req.actual = 0; - req->req.complete = NULL; + req->req.complete = fsl_noop_complete; req->dtd_count = 0; ret = usb_gadget_map_request(&ep->udc->gadget, &req->req, ep_is_in(ep)); @@ -1366,7 +1372,7 @@ static void ch9getstatus(struct fsl_udc *udc, u8 request_type, u16 value, req->req.length = 2; req->req.status = -EINPROGRESS; req->req.actual = 0; - req->req.complete = NULL; + req->req.complete = fsl_noop_complete; req->dtd_count = 0; ret = usb_gadget_map_request(&ep->udc->gadget, &req->req, ep_is_in(ep)); From b6092a57150c1641dffb6bfeebbe2cbde7275d1f Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Thu, 19 Jan 2017 18:55:27 +0100 Subject: [PATCH 105/357] usb: gadget: f_hid: fix: Free out requests commit 20d2ca955bd09639c7b01db5761d157c297aea0a upstream. Requests for out endpoint are allocated in bind() function but never released. This commit ensures that all pending requests are released when we disable out endpoint. Fixes: 99c515005857 ("usb: gadget: hidg: register OUT INT endpoint for SET_REPORT") Tested-by: David Lechner Signed-off-by: Krzysztof Opasiak Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 36 +++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index e2966f87c860..ceb1d0cc5c90 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -371,20 +371,36 @@ static inline struct usb_request *hidg_alloc_ep_req(struct usb_ep *ep, static void hidg_set_report_complete(struct usb_ep *ep, struct usb_request *req) { struct f_hidg *hidg = (struct f_hidg *) req->context; + struct usb_composite_dev *cdev = hidg->func.config->cdev; struct f_hidg_req_list *req_list; unsigned long flags; - req_list = kzalloc(sizeof(*req_list), GFP_ATOMIC); - if (!req_list) + switch (req->status) { + case 0: + req_list = kzalloc(sizeof(*req_list), GFP_ATOMIC); + if (!req_list) { + ERROR(cdev, "Unable to allocate mem for req_list\n"); + goto free_req; + } + + req_list->req = req; + + spin_lock_irqsave(&hidg->spinlock, flags); + list_add_tail(&req_list->list, &hidg->completed_out_req); + spin_unlock_irqrestore(&hidg->spinlock, flags); + + wake_up(&hidg->read_queue); + break; + default: + ERROR(cdev, "Set report failed %d\n", req->status); + /* FALLTHROUGH */ + case -ECONNABORTED: /* hardware forced ep reset */ + case -ECONNRESET: /* request dequeued */ + case -ESHUTDOWN: /* disconnect from host */ +free_req: + free_ep_req(ep, req); return; - - req_list->req = req; - - spin_lock_irqsave(&hidg->spinlock, flags); - list_add_tail(&req_list->list, &hidg->completed_out_req); - spin_unlock_irqrestore(&hidg->spinlock, flags); - - wake_up(&hidg->read_queue); + } } static int hidg_setup(struct usb_function *f, From d3acd94c0f79387b0d1855bdd4690f1596eb3f25 Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Thu, 19 Jan 2017 18:55:28 +0100 Subject: [PATCH 106/357] usb: gadget: f_hid: fix: Prevent accessing released memory commit aa65d11aa008f4de58a9cee7e121666d9d68505e upstream. When we unlock our spinlock to copy data to user we may get disabled by USB host and free the whole list of completed out requests including the one from which we are copying the data to user memory. To prevent from this let's remove our working element from the list and place it back only if there is sth left when we finish with it. Fixes: 99c515005857 ("usb: gadget: hidg: register OUT INT endpoint for SET_REPORT") Tested-by: David Lechner Signed-off-by: Krzysztof Opasiak Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index ceb1d0cc5c90..b914e5725387 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -223,6 +223,13 @@ static ssize_t f_hidg_read(struct file *file, char __user *buffer, /* pick the first one */ list = list_first_entry(&hidg->completed_out_req, struct f_hidg_req_list, list); + + /* + * Remove this from list to protect it from beign free() + * while host disables our function + */ + list_del(&list->list); + req = list->req; count = min_t(unsigned int, count, req->actual - list->pos); spin_unlock_irqrestore(&hidg->spinlock, flags); @@ -238,15 +245,20 @@ static ssize_t f_hidg_read(struct file *file, char __user *buffer, * call, taking into account its current read position. */ if (list->pos == req->actual) { - spin_lock_irqsave(&hidg->spinlock, flags); - list_del(&list->list); kfree(list); - spin_unlock_irqrestore(&hidg->spinlock, flags); req->length = hidg->report_length; ret = usb_ep_queue(hidg->out_ep, req, GFP_KERNEL); - if (ret < 0) + if (ret < 0) { + free_ep_req(hidg->out_ep, req); return ret; + } + } else { + spin_lock_irqsave(&hidg->spinlock, flags); + list_add(&list->list, &hidg->completed_out_req); + spin_unlock_irqrestore(&hidg->spinlock, flags); + + wake_up(&hidg->read_queue); } return count; @@ -506,14 +518,18 @@ static void hidg_disable(struct usb_function *f) { struct f_hidg *hidg = func_to_hidg(f); struct f_hidg_req_list *list, *next; + unsigned long flags; usb_ep_disable(hidg->in_ep); usb_ep_disable(hidg->out_ep); + spin_lock_irqsave(&hidg->spinlock, flags); list_for_each_entry_safe(list, next, &hidg->completed_out_req, list) { + free_ep_req(hidg->out_ep, list->req); list_del(&list->list); kfree(list); } + spin_unlock_irqrestore(&hidg->spinlock, flags); } static int hidg_set_alt(struct usb_function *f, unsigned intf, unsigned alt) From 6d0511ed15db30965f8a3be8f0733bb6efd2b95e Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Thu, 19 Jan 2017 18:55:29 +0100 Subject: [PATCH 107/357] usb: gadget: f_hid: Use spinlock instead of mutex commit 33e4c1a9987a1fc3b42c3b534100b5b006d55c61 upstream. As IN request has to be allocated in set_alt() and released in disable() we cannot use mutex to protect it as we cannot sleep in those funcitons. Let's replace this mutex with a spinlock. Tested-by: David Lechner Signed-off-by: Krzysztof Opasiak Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 57 +++++++++++++++++------------ 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index b914e5725387..b0f71957d00b 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -50,12 +50,12 @@ struct f_hidg { /* recv report */ struct list_head completed_out_req; - spinlock_t spinlock; + spinlock_t read_spinlock; wait_queue_head_t read_queue; unsigned int qlen; /* send report */ - struct mutex lock; + spinlock_t write_spinlock; bool write_pending; wait_queue_head_t write_queue; struct usb_request *req; @@ -204,20 +204,20 @@ static ssize_t f_hidg_read(struct file *file, char __user *buffer, if (!access_ok(VERIFY_WRITE, buffer, count)) return -EFAULT; - spin_lock_irqsave(&hidg->spinlock, flags); + spin_lock_irqsave(&hidg->read_spinlock, flags); #define READ_COND (!list_empty(&hidg->completed_out_req)) /* wait for at least one buffer to complete */ while (!READ_COND) { - spin_unlock_irqrestore(&hidg->spinlock, flags); + spin_unlock_irqrestore(&hidg->read_spinlock, flags); if (file->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(hidg->read_queue, READ_COND)) return -ERESTARTSYS; - spin_lock_irqsave(&hidg->spinlock, flags); + spin_lock_irqsave(&hidg->read_spinlock, flags); } /* pick the first one */ @@ -232,7 +232,7 @@ static ssize_t f_hidg_read(struct file *file, char __user *buffer, req = list->req; count = min_t(unsigned int, count, req->actual - list->pos); - spin_unlock_irqrestore(&hidg->spinlock, flags); + spin_unlock_irqrestore(&hidg->read_spinlock, flags); /* copy to user outside spinlock */ count -= copy_to_user(buffer, req->buf + list->pos, count); @@ -254,9 +254,9 @@ static ssize_t f_hidg_read(struct file *file, char __user *buffer, return ret; } } else { - spin_lock_irqsave(&hidg->spinlock, flags); + spin_lock_irqsave(&hidg->read_spinlock, flags); list_add(&list->list, &hidg->completed_out_req); - spin_unlock_irqrestore(&hidg->spinlock, flags); + spin_unlock_irqrestore(&hidg->read_spinlock, flags); wake_up(&hidg->read_queue); } @@ -267,13 +267,16 @@ static ssize_t f_hidg_read(struct file *file, char __user *buffer, static void f_hidg_req_complete(struct usb_ep *ep, struct usb_request *req) { struct f_hidg *hidg = (struct f_hidg *)ep->driver_data; + unsigned long flags; if (req->status != 0) { ERROR(hidg->func.config->cdev, "End Point Request ERROR: %d\n", req->status); } + spin_lock_irqsave(&hidg->write_spinlock, flags); hidg->write_pending = 0; + spin_unlock_irqrestore(&hidg->write_spinlock, flags); wake_up(&hidg->write_queue); } @@ -281,18 +284,19 @@ static ssize_t f_hidg_write(struct file *file, const char __user *buffer, size_t count, loff_t *offp) { struct f_hidg *hidg = file->private_data; + unsigned long flags; ssize_t status = -ENOMEM; if (!access_ok(VERIFY_READ, buffer, count)) return -EFAULT; - mutex_lock(&hidg->lock); + spin_lock_irqsave(&hidg->write_spinlock, flags); #define WRITE_COND (!hidg->write_pending) /* write queue */ while (!WRITE_COND) { - mutex_unlock(&hidg->lock); + spin_unlock_irqrestore(&hidg->write_spinlock, flags); if (file->f_flags & O_NONBLOCK) return -EAGAIN; @@ -300,17 +304,20 @@ static ssize_t f_hidg_write(struct file *file, const char __user *buffer, hidg->write_queue, WRITE_COND)) return -ERESTARTSYS; - mutex_lock(&hidg->lock); + spin_lock_irqsave(&hidg->write_spinlock, flags); } + hidg->write_pending = 1; count = min_t(unsigned, count, hidg->report_length); + + spin_unlock_irqrestore(&hidg->write_spinlock, flags); status = copy_from_user(hidg->req->buf, buffer, count); if (status != 0) { ERROR(hidg->func.config->cdev, "copy_from_user error\n"); - mutex_unlock(&hidg->lock); - return -EINVAL; + status = -EINVAL; + goto release_write_pending; } hidg->req->status = 0; @@ -318,19 +325,23 @@ static ssize_t f_hidg_write(struct file *file, const char __user *buffer, hidg->req->length = count; hidg->req->complete = f_hidg_req_complete; hidg->req->context = hidg; - hidg->write_pending = 1; status = usb_ep_queue(hidg->in_ep, hidg->req, GFP_ATOMIC); if (status < 0) { ERROR(hidg->func.config->cdev, "usb_ep_queue error on int endpoint %zd\n", status); - hidg->write_pending = 0; - wake_up(&hidg->write_queue); + goto release_write_pending; } else { status = count; } - mutex_unlock(&hidg->lock); + return status; +release_write_pending: + spin_lock_irqsave(&hidg->write_spinlock, flags); + hidg->write_pending = 0; + spin_unlock_irqrestore(&hidg->write_spinlock, flags); + + wake_up(&hidg->write_queue); return status; } @@ -397,9 +408,9 @@ static void hidg_set_report_complete(struct usb_ep *ep, struct usb_request *req) req_list->req = req; - spin_lock_irqsave(&hidg->spinlock, flags); + spin_lock_irqsave(&hidg->read_spinlock, flags); list_add_tail(&req_list->list, &hidg->completed_out_req); - spin_unlock_irqrestore(&hidg->spinlock, flags); + spin_unlock_irqrestore(&hidg->read_spinlock, flags); wake_up(&hidg->read_queue); break; @@ -523,13 +534,13 @@ static void hidg_disable(struct usb_function *f) usb_ep_disable(hidg->in_ep); usb_ep_disable(hidg->out_ep); - spin_lock_irqsave(&hidg->spinlock, flags); + spin_lock_irqsave(&hidg->read_spinlock, flags); list_for_each_entry_safe(list, next, &hidg->completed_out_req, list) { free_ep_req(hidg->out_ep, list->req); list_del(&list->list); kfree(list); } - spin_unlock_irqrestore(&hidg->spinlock, flags); + spin_unlock_irqrestore(&hidg->read_spinlock, flags); } static int hidg_set_alt(struct usb_function *f, unsigned intf, unsigned alt) @@ -678,8 +689,8 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) if (status) goto fail; - mutex_init(&hidg->lock); - spin_lock_init(&hidg->spinlock); + spin_lock_init(&hidg->write_spinlock); + spin_lock_init(&hidg->read_spinlock); init_waitqueue_head(&hidg->write_queue); init_waitqueue_head(&hidg->read_queue); INIT_LIST_HEAD(&hidg->completed_out_req); From 879dc37cc3a81c99527410a83ae9cc35c637e8b1 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 7 Dec 2016 01:16:25 -0800 Subject: [PATCH 108/357] hv: allocate synic pages for all present CPUs commit 421b8f20d3c381b215f988b42428f56fc3b82405 upstream. It may happen that not all CPUs are online when we do hv_synic_alloc() and in case more CPUs come online later we may try accessing these allocated structures. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 60dbd6cb4640..e4bb498e41ae 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -411,7 +411,7 @@ int hv_synic_alloc(void) goto err; } - for_each_online_cpu(cpu) { + for_each_present_cpu(cpu) { hv_context.event_dpc[cpu] = kmalloc(size, GFP_ATOMIC); if (hv_context.event_dpc[cpu] == NULL) { pr_err("Unable to allocate event dpc\n"); @@ -482,7 +482,7 @@ void hv_synic_free(void) int cpu; kfree(hv_context.hv_numa_map); - for_each_online_cpu(cpu) + for_each_present_cpu(cpu) hv_synic_free_cpu(cpu); } From 6e936b06795ae5dcb5248e0c4344d8303de4c2db Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 7 Dec 2016 01:16:26 -0800 Subject: [PATCH 109/357] hv: init percpu_list in hv_synic_alloc() commit 3c7630d35009e6635e5b58d62de554fd5b6db5df upstream. Initializing hv_context.percpu_list in hv_synic_alloc() helps to prevent a crash in percpu_channel_enq() when not all CPUs were online during initialization and it naturally belongs there. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index e4bb498e41ae..a2567a4be1a8 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -457,6 +457,8 @@ int hv_synic_alloc(void) pr_err("Unable to allocate post msg page\n"); goto err; } + + INIT_LIST_HEAD(&hv_context.percpu_list[cpu]); } return 0; @@ -552,8 +554,6 @@ void hv_synic_init(void *arg) rdmsrl(HV_X64_MSR_VP_INDEX, vp_index); hv_context.vp_index[cpu] = (u32)vp_index; - INIT_LIST_HEAD(&hv_context.percpu_list[cpu]); - /* * Register the per-cpu clockevent source. */ From 664f72a17f2c072175a0083388f315f811ff1acb Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 7 Dec 2016 01:16:27 -0800 Subject: [PATCH 110/357] hv: don't reset hv_context.tsc_page on crash commit 56ef6718a1d8d77745033c5291e025ce18504159 upstream. It may happen that secondary CPUs are still alive and resetting hv_context.tsc_page will cause a consequent crash in read_hv_clock_tsc() as we don't check for it being not NULL there. It is safe as we're not freeing this page anyways. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index a2567a4be1a8..6e49a4dd99c0 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -309,9 +309,10 @@ void hv_cleanup(bool crash) hypercall_msr.as_uint64 = 0; wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64); - if (!crash) + if (!crash) { vfree(hv_context.tsc_page); - hv_context.tsc_page = NULL; + hv_context.tsc_page = NULL; + } } #endif } From e44eab51e74c59a0b01c1fce2159c82b3c8a31b0 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 7 Dec 2016 01:16:28 -0800 Subject: [PATCH 111/357] Drivers: hv: vmbus: Prevent sending data on a rescinded channel commit e7e97dd8b77ee7366f2f8c70a033bf5fa05ec2e0 upstream. After the channel is rescinded, the host does not read from the rescinded channel. Fail writes to a channel that has already been rescinded. If we permit writes on a rescinded channel, since the host will not respond we will have situations where we will be unable to unload vmbus drivers that cannot have any outstanding requests to the host at the point they are unoaded. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/ring_buffer.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 308dbda700eb..e94ed1c22c8b 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -298,6 +298,9 @@ int hv_ringbuffer_write(struct vmbus_channel *channel, unsigned long flags = 0; struct hv_ring_buffer_info *outring_info = &channel->outbound; + if (channel->rescind) + return -ENODEV; + for (i = 0; i < kv_count; i++) totalbytes_towrite += kv_list[i].iov_len; @@ -350,6 +353,10 @@ int hv_ringbuffer_write(struct vmbus_channel *channel, spin_unlock_irqrestore(&outring_info->ring_lock, flags); hv_signal_on_write(old_write, channel, kick_q); + + if (channel->rescind) + return -ENODEV; + return 0; } From 728fe696fd3ee551c24e02e7826dd9bdb89f1d47 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 22 Dec 2016 16:54:00 -0800 Subject: [PATCH 112/357] Drivers: hv: vmbus: Fix a rescind handling bug commit ccb61f8a99e6c29df4fb96a65dad4fad740d5be9 upstream. The host can rescind a channel that has been offered to the guest and once the channel is rescinded, the host does not respond to any requests on that channel. Deal with the case where the guest may be blocked waiting for a response from the host. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 18 ++++++++++++++++++ drivers/hv/channel_mgmt.c | 25 +++++++++++++++++++++++++ include/linux/hyperv.h | 1 + 3 files changed, 44 insertions(+) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index d5b8d9fd50bb..be34547cdb68 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -157,6 +157,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, } init_completion(&open_info->waitevent); + open_info->waiting_channel = newchannel; open_msg = (struct vmbus_channel_open_channel *)open_info->msg; open_msg->header.msgtype = CHANNELMSG_OPENCHANNEL; @@ -194,6 +195,11 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, list_del(&open_info->msglistentry); spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); + if (newchannel->rescind) { + err = -ENODEV; + goto error_free_gpadl; + } + if (open_info->response.open_result.status) { err = -EAGAIN; goto error_free_gpadl; @@ -405,6 +411,7 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, return ret; init_completion(&msginfo->waitevent); + msginfo->waiting_channel = channel; gpadlmsg = (struct vmbus_channel_gpadl_header *)msginfo->msg; gpadlmsg->header.msgtype = CHANNELMSG_GPADL_HEADER; @@ -441,6 +448,11 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, } wait_for_completion(&msginfo->waitevent); + if (channel->rescind) { + ret = -ENODEV; + goto cleanup; + } + /* At this point, we received the gpadl created msg */ *gpadl_handle = gpadlmsg->gpadl; @@ -474,6 +486,7 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle) return -ENOMEM; init_completion(&info->waitevent); + info->waiting_channel = channel; msg = (struct vmbus_channel_gpadl_teardown *)info->msg; @@ -493,6 +506,11 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle) wait_for_completion(&info->waitevent); + if (channel->rescind) { + ret = -ENODEV; + goto post_msg_err; + } + post_msg_err: spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); list_del(&info->msglistentry); diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index f3a8b52acb51..cb9531541a12 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -147,6 +147,29 @@ static const struct { { HV_RDV_GUID }, }; +/* + * The rescinded channel may be blocked waiting for a response from the host; + * take care of that. + */ +static void vmbus_rescind_cleanup(struct vmbus_channel *channel) +{ + struct vmbus_channel_msginfo *msginfo; + unsigned long flags; + + + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); + + list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, + msglistentry) { + + if (msginfo->waiting_channel == channel) { + complete(&msginfo->waitevent); + break; + } + } + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); +} + static bool is_unsupported_vmbus_devs(const uuid_le *guid) { int i; @@ -825,6 +848,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) channel->rescind = true; spin_unlock_irqrestore(&channel->lock, flags); + vmbus_rescind_cleanup(channel); + if (channel->device_obj) { if (channel->chn_rescind_callback) { channel->chn_rescind_callback(channel); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index c92a083bcf16..192eef2fd766 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -641,6 +641,7 @@ struct vmbus_channel_msginfo { /* Synchronize the request/response if needed */ struct completion waitevent; + struct vmbus_channel *waiting_channel; union { struct vmbus_channel_version_supported version_supported; struct vmbus_channel_open_result open_result; From d4d5b507c6427b417f4ff5411649e55af4447c84 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 22 Dec 2016 16:54:01 -0800 Subject: [PATCH 113/357] Drivers: hv: util: kvp: Fix a rescind processing issue commit 5a66fecbf6aa528e375cbebccb1061cc58d80c84 upstream. KVP may use a char device to support the communication between the user level daemon and the driver. When the KVP channel is rescinded we need to make sure that the char device is fully cleaned up before we can process a new KVP offer from the host. Implement this logic. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_kvp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index 5e1fdc8d32ab..3abfc5983c97 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -88,6 +88,7 @@ static DECLARE_WORK(kvp_sendkey_work, kvp_send_key); static const char kvp_devname[] = "vmbus/hv_kvp"; static u8 *recv_buffer; static struct hvutil_transport *hvt; +static struct completion release_event; /* * Register the kernel component with the user-level daemon. * As part of this registration, pass the LIC version number. @@ -716,6 +717,7 @@ static void kvp_on_reset(void) if (cancel_delayed_work_sync(&kvp_timeout_work)) kvp_respond_to_host(NULL, HV_E_FAIL); kvp_transaction.state = HVUTIL_DEVICE_INIT; + complete(&release_event); } int @@ -724,6 +726,7 @@ hv_kvp_init(struct hv_util_service *srv) recv_buffer = srv->recv_buffer; kvp_transaction.recv_channel = srv->channel; + init_completion(&release_event); /* * When this driver loads, the user level daemon that * processes the host requests may not yet be running. @@ -747,4 +750,5 @@ void hv_kvp_deinit(void) cancel_delayed_work_sync(&kvp_timeout_work); cancel_work_sync(&kvp_sendkey_work); hvutil_transport_destroy(hvt); + wait_for_completion(&release_event); } From 4db47d9bf99d8bc3acaa9fd4b6437ea856a2f037 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 22 Dec 2016 16:54:02 -0800 Subject: [PATCH 114/357] Drivers: hv: util: Fcopy: Fix a rescind processing issue commit 20951c7535b5e6af46bc37b7142105f716df739c upstream. Fcopy may use a char device to support the communication between the user level daemon and the driver. When the Fcopy channel is rescinded we need to make sure that the char device is fully cleaned up before we can process a new Fcopy offer from the host. Implement this logic. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_fcopy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c index 8b2ba98831ec..e47d8c9db03a 100644 --- a/drivers/hv/hv_fcopy.c +++ b/drivers/hv/hv_fcopy.c @@ -61,6 +61,7 @@ static DECLARE_WORK(fcopy_send_work, fcopy_send_data); static const char fcopy_devname[] = "vmbus/hv_fcopy"; static u8 *recv_buffer; static struct hvutil_transport *hvt; +static struct completion release_event; /* * This state maintains the version number registered by the daemon. */ @@ -317,6 +318,7 @@ static void fcopy_on_reset(void) if (cancel_delayed_work_sync(&fcopy_timeout_work)) fcopy_respond_to_host(HV_E_FAIL); + complete(&release_event); } int hv_fcopy_init(struct hv_util_service *srv) @@ -324,6 +326,7 @@ int hv_fcopy_init(struct hv_util_service *srv) recv_buffer = srv->recv_buffer; fcopy_transaction.recv_channel = srv->channel; + init_completion(&release_event); /* * When this driver loads, the user level daemon that * processes the host requests may not yet be running. @@ -345,4 +348,5 @@ void hv_fcopy_deinit(void) fcopy_transaction.state = HVUTIL_DEVICE_DYING; cancel_delayed_work_sync(&fcopy_timeout_work); hvutil_transport_destroy(hvt); + wait_for_completion(&release_event); } From fbff994b8a464cd5de50fbb2e0b8341becb70336 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 22 Dec 2016 16:54:03 -0800 Subject: [PATCH 115/357] Drivers: hv: util: Backup: Fix a rescind processing issue commit d77044d142e960f7b5f814a91ecb8bcf86aa552c upstream. VSS may use a char device to support the communication between the user level daemon and the driver. When the VSS channel is rescinded we need to make sure that the char device is fully cleaned up before we can process a new VSS offer from the host. Implement this logic. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_snapshot.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c index a6707133c297..a76e3db0d01f 100644 --- a/drivers/hv/hv_snapshot.c +++ b/drivers/hv/hv_snapshot.c @@ -66,6 +66,7 @@ static int dm_reg_value; static const char vss_devname[] = "vmbus/hv_vss"; static __u8 *recv_buffer; static struct hvutil_transport *hvt; +static struct completion release_event; static void vss_timeout_func(struct work_struct *dummy); static void vss_handle_request(struct work_struct *dummy); @@ -330,11 +331,13 @@ static void vss_on_reset(void) if (cancel_delayed_work_sync(&vss_timeout_work)) vss_respond_to_host(HV_E_FAIL); vss_transaction.state = HVUTIL_DEVICE_INIT; + complete(&release_event); } int hv_vss_init(struct hv_util_service *srv) { + init_completion(&release_event); if (vmbus_proto_version < VERSION_WIN8_1) { pr_warn("Integration service 'Backup (volume snapshot)'" " not supported on this host version.\n"); @@ -365,4 +368,5 @@ void hv_vss_deinit(void) cancel_delayed_work_sync(&vss_timeout_work); cancel_work_sync(&vss_handle_request_work); hvutil_transport_destroy(hvt); + wait_for_completion(&release_event); } From 50fc62d5eeb3392f11161637b5ed4e0d340fed74 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 22 Dec 2016 18:07:52 -0700 Subject: [PATCH 116/357] RDMA/core: Fix incorrect structure packing for booleans commit 55efcfcd7776165b294f8b5cd6e05ca00ec89b7c upstream. The RDMA core uses ib_pack() to convert from unpacked CPU structs to on-the-wire bitpacked structs. This process requires that 1 bit fields are declared as u8 in the unpacked struct, otherwise the packing process does not read the value properly and the packed result is wired to 0. Several places wrongly used int. Crucially this means the kernel has never, set reversible correctly in the path record request. It has always asked for irreversible paths even if the ULP requests otherwise. When the kernel is used with a SM that supports this feature, it completely breaks communication management if reversible paths are not properly requested. The only reason this ever worked is because opensm ignores the reversible bit. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Gunthorpe Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- include/rdma/ib_sa.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 5ee7aab95eb8..fd0e53219f93 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -153,12 +153,12 @@ struct ib_sa_path_rec { union ib_gid sgid; __be16 dlid; __be16 slid; - int raw_traffic; + u8 raw_traffic; /* reserved */ __be32 flow_label; u8 hop_limit; u8 traffic_class; - int reversible; + u8 reversible; u8 numb_path; __be16 pkey; __be16 qos_class; @@ -220,7 +220,7 @@ struct ib_sa_mcmember_rec { u8 hop_limit; u8 scope; u8 join_state; - int proxy_join; + u8 proxy_join; }; /* Service Record Component Mask Sec 15.2.5.14 Ver 1.1 */ From c8cdd9234caced510db1a03491af475938f9855a Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 21 Feb 2017 11:21:57 -0800 Subject: [PATCH 117/357] rdma_cm: fail iwarp accepts w/o connection params commit f2625f7db4dd0bbd16a9c7d2950e7621f9aa57ad upstream. cma_accept_iw() needs to return an error if conn_params is NULL. Since this is coming from user space, we can crash. Reported-by: Shaobo He Acked-by: Sean Hefty Signed-off-by: Steve Wise Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index c25768c2dd3b..f2d40c05ef9e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3540,6 +3540,9 @@ static int cma_accept_iw(struct rdma_id_private *id_priv, struct iw_cm_conn_param iw_param; int ret; + if (!conn_param) + return -EINVAL; + ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) return ret; From 4cf918c804c657c3a8b9cd4bb46171315bdfad62 Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Wed, 22 Feb 2017 12:05:03 -0500 Subject: [PATCH 118/357] gfs2: Add missing rcu locking for glock lookup commit f38e5fb95a1f8feda88531eedc98f69b24748712 upstream. We must hold the rcu read lock across looking up glocks and trying to bump their refcount to prevent the glocks from being freed in between. Signed-off-by: Andrew Price Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/glock.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 14cbf60167a7..133f322573b5 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -658,9 +658,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, struct kmem_cache *cachep; int ret, tries = 0; + rcu_read_lock(); gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms); if (gl && !lockref_get_not_dead(&gl->gl_lockref)) gl = NULL; + rcu_read_unlock(); *glp = gl; if (gl) @@ -728,15 +730,18 @@ again: if (ret == -EEXIST) { ret = 0; + rcu_read_lock(); tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms); if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) { if (++tries < 100) { + rcu_read_unlock(); cond_resched(); goto again; } tmp = NULL; ret = -ENOMEM; } + rcu_read_unlock(); } else { WARN_ON_ONCE(ret); } From 24d77f99a7b74b4b68682d54ee79992b8aed7421 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 27 Jan 2017 02:06:36 -0800 Subject: [PATCH 119/357] remoteproc: qcom: mdt_loader: Don't overwrite firmware object commit 3e8b571a9a0881ba3381ca0915995696da145ab8 upstream. The "fw" firmware object is passed from the remoteproc core and should not be overwritten, as that results in leaked buffers and a double free of the the last firmware object. Fixes: 051fb70fd4ea ("remoteproc: qcom: Driver for the self-authenticating Hexagon v5") Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/remoteproc/qcom_mdt_loader.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/remoteproc/qcom_mdt_loader.c b/drivers/remoteproc/qcom_mdt_loader.c index 114e8e4cef67..04db02d9059d 100644 --- a/drivers/remoteproc/qcom_mdt_loader.c +++ b/drivers/remoteproc/qcom_mdt_loader.c @@ -115,6 +115,7 @@ int qcom_mdt_load(struct rproc *rproc, const struct elf32_phdr *phdrs; const struct elf32_phdr *phdr; const struct elf32_hdr *ehdr; + const struct firmware *seg_fw; size_t fw_name_len; char *fw_name; void *ptr; @@ -153,16 +154,16 @@ int qcom_mdt_load(struct rproc *rproc, if (phdr->p_filesz) { sprintf(fw_name + fw_name_len - 3, "b%02d", i); - ret = request_firmware(&fw, fw_name, &rproc->dev); + ret = request_firmware(&seg_fw, fw_name, &rproc->dev); if (ret) { dev_err(&rproc->dev, "failed to load %s\n", fw_name); break; } - memcpy(ptr, fw->data, fw->size); + memcpy(ptr, seg_fw->data, seg_fw->size); - release_firmware(fw); + release_firmware(seg_fw); } if (phdr->p_memsz > phdr->p_filesz) From d56dd01bc291962f5fb4b53f6ed6de852af0a0e3 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Wed, 28 Dec 2016 15:40:04 -0600 Subject: [PATCH 120/357] rtlwifi: Fix alignment issues commit 40b368af4b750863b2cb66a3a9513241db2f0793 upstream. The addresses of Wlan NIC registers are natural alignment, but some drivers have bugs. These are evident on platforms that need natural alignment to access registers. This change contains the following: 1. Function _rtl8821ae_dbi_read() is used to read one byte from DBI, thus it should use rtl_read_byte(). 2. Register 0x4C7 of 8192ee is single byte. Signed-off-by: Ping-Ke Shih Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c index ebf663e1a81a..cab4601eba8e 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c @@ -1006,7 +1006,7 @@ static void _rtl92ee_hw_configure(struct ieee80211_hw *hw) rtl_write_word(rtlpriv, REG_SIFS_TRX, 0x100a); /* Note Data sheet don't define */ - rtl_write_word(rtlpriv, 0x4C7, 0x80); + rtl_write_byte(rtlpriv, 0x4C7, 0x80); rtl_write_byte(rtlpriv, REG_RX_PKT_LIMIT, 0x20); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index 1281ebe0c30a..2cbef9647acc 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -1128,7 +1128,7 @@ static u8 _rtl8821ae_dbi_read(struct rtl_priv *rtlpriv, u16 addr) } if (0 == tmp) { read_addr = REG_DBI_RDATA + addr % 4; - ret = rtl_read_word(rtlpriv, read_addr); + ret = rtl_read_byte(rtlpriv, read_addr); } return ret; } From 5ea52fac0d827452cd2887722580ab58fdbd2d7d Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sun, 5 Feb 2017 10:24:22 -0600 Subject: [PATCH 121/357] rtlwifi: rtl8192c-common: Fix "BUG: KASAN: commit 6773386f977ce5af339f9678fa2918909a946c6b upstream. Kernels built with CONFIG_KASAN=y report the following BUG for rtl8192cu and rtl8192c-common: ================================================================== BUG: KASAN: slab-out-of-bounds in rtl92c_dm_bt_coexist+0x858/0x1e40 [rtl8192c_common] at addr ffff8801c90edb08 Read of size 1 by task kworker/0:1/38 page:ffffea0007243800 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 flags: 0x8000000000004000(head) page dumped because: kasan: bad access detected CPU: 0 PID: 38 Comm: kworker/0:1 Not tainted 4.9.7-gentoo #3 Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./Z77-DS3H, BIOS F11a 11/13/2013 Workqueue: rtl92c_usb rtl_watchdog_wq_callback [rtlwifi] 0000000000000000 ffffffff829eea33 ffff8801d7f0fa30 ffff8801c90edb08 ffffffff824c0f09 ffff8801d4abee80 0000000000000004 0000000000000297 ffffffffc070b57c ffff8801c7aa7c48 ffff880100000004 ffffffff000003e8 Call Trace: [] ? dump_stack+0x5c/0x79 [] ? kasan_report_error+0x4b9/0x4e0 [] ? _usb_read_sync+0x15c/0x280 [rtl_usb] [] ? __asan_report_load1_noabort+0x45/0x50 [] ? rtl92c_dm_bt_coexist+0x858/0x1e40 [rtl8192c_common] [] ? rtl92c_dm_bt_coexist+0x858/0x1e40 [rtl8192c_common] [] ? rtl92c_dm_rf_saving+0x96e/0x1330 [rtl8192c_common] ... The problem is due to rtl8192ce and rtl8192cu sharing routines, and having different layouts of struct rtl_pci_priv, which is used by rtl8192ce, and struct rtl_usb_priv, which is used by rtl8192cu. The problem was resolved by placing the struct bt_coexist_info at the head of each of those private areas. Reported-and-tested-by: Dmitry Osipenko Signed-off-by: Larry Finger Cc: Dmitry Osipenko Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/pci.h | 4 ++-- drivers/net/wireless/realtek/rtlwifi/usb.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.h b/drivers/net/wireless/realtek/rtlwifi/pci.h index b951ebac15ea..d2f4dd470fdb 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.h +++ b/drivers/net/wireless/realtek/rtlwifi/pci.h @@ -275,10 +275,10 @@ struct mp_adapter { }; struct rtl_pci_priv { + struct bt_coexist_info bt_coexist; + struct rtl_led_ctl ledctl; struct rtl_pci dev; struct mp_adapter ndis_adapter; - struct rtl_led_ctl ledctl; - struct bt_coexist_info bt_coexist; }; #define rtl_pcipriv(hw) (((struct rtl_pci_priv *)(rtl_priv(hw))->priv)) diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.h b/drivers/net/wireless/realtek/rtlwifi/usb.h index 685273ca9561..441c4412130c 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.h +++ b/drivers/net/wireless/realtek/rtlwifi/usb.h @@ -150,8 +150,9 @@ struct rtl_usb { }; struct rtl_usb_priv { - struct rtl_usb dev; + struct bt_coexist_info bt_coexist; struct rtl_led_ctl ledctl; + struct rtl_usb dev; }; #define rtl_usbpriv(hw) (((struct rtl_usb_priv *)(rtl_priv(hw))->priv)) From 5af94e637fd8b2b89ea49bfd112b37137fbecb0e Mon Sep 17 00:00:00 2001 From: Stefano Babic Date: Fri, 20 Jan 2017 10:38:20 -0500 Subject: [PATCH 122/357] VME: restore bus_remove function causing incomplete module unload commit 9797484ba83d68f18fe1cbd964b7cd830f78f0f7 upstream. Commit 050c3d52cc7810d9d17b8cd231708609af6876ae ("vme: make core vme support explicitly non-modular") dropped the remove function because it appeared as if it was for removal of the bus, which is not supported. However, vme_bus_remove() is called when a VME device is removed from the bus and not when the bus is removed; as it calls the VME device driver's cleanup function. Without this function, the remove() in the VME device driver is never called and VME device drivers cannot be reloaded again. Here we restore the remove function that was deleted in that commit, and the reference to the function in the bus structure. Fixes: 050c3d52cc78 ("vme: make core vme support explicitly non-modular") Cc: Manohar Vanga Acked-by: Martyn Welch Cc: devel@driverdev.osuosl.org Signed-off-by: Stefano Babic Signed-off-by: Paul Gortmaker Signed-off-by: Greg Kroah-Hartman --- drivers/vme/vme.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/vme/vme.c b/drivers/vme/vme.c index bdbadaa47ef3..0035cf79760a 100644 --- a/drivers/vme/vme.c +++ b/drivers/vme/vme.c @@ -1625,10 +1625,25 @@ static int vme_bus_probe(struct device *dev) return retval; } +static int vme_bus_remove(struct device *dev) +{ + int retval = -ENODEV; + struct vme_driver *driver; + struct vme_dev *vdev = dev_to_vme_dev(dev); + + driver = dev->platform_data; + + if (driver->remove != NULL) + retval = driver->remove(vdev); + + return retval; +} + struct bus_type vme_bus_type = { .name = "vme", .match = vme_bus_match, .probe = vme_bus_probe, + .remove = vme_bus_remove, }; EXPORT_SYMBOL(vme_bus_type); From 9bdd39c146fc4f7da1ceb91ac1ebe80e7f1a8d41 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Feb 2017 17:04:42 -0500 Subject: [PATCH 123/357] nfsd: minor nfsd_setattr cleanup commit 758e99fefe1d9230111296956335cd35995c0eaf upstream. Simplify exit paths, size_change use. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/vfs.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 8ca642fe9b21..186f426a3699 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -377,7 +377,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, __be32 err; int host_err; bool get_write_count; - int size_change = 0; + bool size_change = (iap->ia_valid & ATTR_SIZE); if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; @@ -390,11 +390,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, /* Get inode */ err = fh_verify(rqstp, fhp, ftype, accmode); if (err) - goto out; + return err; if (get_write_count) { host_err = fh_want_write(fhp); if (host_err) - return nfserrno(host_err); + goto out; } dentry = fhp->fh_dentry; @@ -405,19 +405,21 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid &= ~ATTR_MODE; if (!iap->ia_valid) - goto out; + return 0; nfsd_sanitize_attrs(inode, iap); + if (check_guard && guardtime != inode->i_ctime.tv_sec) + return nfserr_notsync; + /* * The size case is special, it changes the file in addition to the * attributes. */ - if (iap->ia_valid & ATTR_SIZE) { + if (size_change) { err = nfsd_get_write_access(rqstp, fhp, iap); if (err) - goto out; - size_change = 1; + return err; /* * RFC5661, Section 18.30.4: @@ -432,23 +434,16 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid |= ATTR_CTIME; - if (check_guard && guardtime != inode->i_ctime.tv_sec) { - err = nfserr_notsync; - goto out_put_write_access; - } - fh_lock(fhp); host_err = notify_change(dentry, iap, NULL); fh_unlock(fhp); - err = nfserrno(host_err); -out_put_write_access: if (size_change) put_write_access(inode); - if (!err) - err = nfserrno(commit_metadata(fhp)); out: - return err; + if (!host_err) + host_err = commit_metadata(fhp); + return nfserrno(host_err); } #if defined(CONFIG_NFSD_V4) From a3c6cbc4eac4473ed5461d5faae2794d3e5c0e44 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Feb 2017 07:21:33 +0100 Subject: [PATCH 124/357] nfsd: special case truncates some more commit 783112f7401ff449d979530209b3f6c2594fdb4e upstream. Both the NFS protocols and the Linux VFS use a setattr operation with a bitmap of attributes to set to set various file attributes including the file size and the uid/gid. The Linux syscalls never mix size updates with unrelated updates like the uid/gid, and some file systems like XFS and GFS2 rely on the fact that truncates don't update random other attributes, and many other file systems handle the case but do not update the other attributes in the same transaction. NFSD on the other hand passes the attributes it gets on the wire more or less directly through to the VFS, leading to updates the file systems don't expect. XFS at least has an assert on the allowed attributes, which caught an unusual NFS client setting the size and group at the same time. To handle this issue properly this splits the notify_change call in nfsd_setattr into two separate ones. Signed-off-by: Christoph Hellwig Tested-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/vfs.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 186f426a3699..b829cc9a9b39 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -414,13 +414,19 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, /* * The size case is special, it changes the file in addition to the - * attributes. + * attributes, and file systems don't expect it to be mixed with + * "random" attribute changes. We thus split out the size change + * into a separate call to ->setattr, and do the rest as a separate + * setattr call. */ if (size_change) { err = nfsd_get_write_access(rqstp, fhp, iap); if (err) return err; + } + fh_lock(fhp); + if (size_change) { /* * RFC5661, Section 18.30.4: * Changing the size of a file with SETATTR indirectly @@ -428,16 +434,30 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, * * (and similar for the older RFCs) */ - if (iap->ia_size != i_size_read(inode)) - iap->ia_valid |= ATTR_MTIME; + struct iattr size_attr = { + .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME, + .ia_size = iap->ia_size, + }; + + host_err = notify_change(dentry, &size_attr, NULL); + if (host_err) + goto out_unlock; + iap->ia_valid &= ~ATTR_SIZE; + + /* + * Avoid the additional setattr call below if the only other + * attribute that the client sends is the mtime, as we update + * it as part of the size change above. + */ + if ((iap->ia_valid & ~ATTR_MTIME) == 0) + goto out_unlock; } iap->ia_valid |= ATTR_CTIME; - - fh_lock(fhp); host_err = notify_change(dentry, iap, NULL); - fh_unlock(fhp); +out_unlock: + fh_unlock(fhp); if (size_change) put_write_access(inode); out: From 0465339eb54953f0be1f03e980b07eeb01e16fca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 8 Feb 2017 11:29:46 -0500 Subject: [PATCH 125/357] NFSv4: Fix memory and state leak in _nfs4_open_and_get_state commit a974deee477af89411e0f80456bfb344ac433c98 upstream. If we exit because the file access check failed, we currently leak the struct nfs4_state. We need to attach it to the open context before returning. Fixes: 3efb9722475e ("NFSv4: Refactor _nfs4_open_and_get_state..") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 78ff8b63d5f7..123105f4be53 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2708,6 +2708,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, ret = PTR_ERR(state); if (IS_ERR(state)) goto out; + ctx->state = state; if (server->caps & NFS_CAP_POSIX_LOCK) set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); if (opendata->o_res.rflags & NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK) @@ -2733,7 +2734,6 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, if (ret != 0) goto out; - ctx->state = state; if (d_inode(dentry) == state->inode) { nfs_inode_attach_open_context(ctx); if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) From 77bbc0c7712a43442e2637247eaa4f01f73f0eb8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Feb 2017 18:42:32 -0500 Subject: [PATCH 126/357] NFSv4: Fix reboot recovery in copy offload commit 9d8cacbf5636657d2cd0dda17438a56d806d3224 upstream. Copy offload code needs to be hooked into the code for handling NFS4ERR_BAD_STATEID by ensuring that we set the "stateid" field in struct nfs4_exception. Reported-by: Olga Kornievskaia Fixes: 2e72448b07dc3 ("NFS: Add COPY nfs operation") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs42proc.c | 63 +++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 608501971fe0..5cda392028ce 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -128,30 +128,26 @@ out_unlock: return err; } -static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src, +static ssize_t _nfs42_proc_copy(struct file *src, struct nfs_lock_context *src_lock, - struct file *dst, loff_t pos_dst, + struct file *dst, struct nfs_lock_context *dst_lock, - size_t count) + struct nfs42_copy_args *args, + struct nfs42_copy_res *res) { - struct nfs42_copy_args args = { - .src_fh = NFS_FH(file_inode(src)), - .src_pos = pos_src, - .dst_fh = NFS_FH(file_inode(dst)), - .dst_pos = pos_dst, - .count = count, - }; - struct nfs42_copy_res res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY], - .rpc_argp = &args, - .rpc_resp = &res, + .rpc_argp = args, + .rpc_resp = res, }; struct inode *dst_inode = file_inode(dst); struct nfs_server *server = NFS_SERVER(dst_inode); + loff_t pos_src = args->src_pos; + loff_t pos_dst = args->dst_pos; + size_t count = args->count; int status; - status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context, + status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context, src_lock, FMODE_READ); if (status) return status; @@ -161,7 +157,7 @@ static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src, if (status) return status; - status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context, + status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock->open_context, dst_lock, FMODE_WRITE); if (status) return status; @@ -171,22 +167,22 @@ static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src, return status; status = nfs4_call_sync(server->client, server, &msg, - &args.seq_args, &res.seq_res, 0); + &args->seq_args, &res->seq_res, 0); if (status == -ENOTSUPP) server->caps &= ~NFS_CAP_COPY; if (status) return status; - if (res.write_res.verifier.committed != NFS_FILE_SYNC) { - status = nfs_commit_file(dst, &res.write_res.verifier.verifier); + if (res->write_res.verifier.committed != NFS_FILE_SYNC) { + status = nfs_commit_file(dst, &res->write_res.verifier.verifier); if (status) return status; } truncate_pagecache_range(dst_inode, pos_dst, - pos_dst + res.write_res.count); + pos_dst + res->write_res.count); - return res.write_res.count; + return res->write_res.count; } ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, @@ -196,8 +192,22 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, struct nfs_server *server = NFS_SERVER(file_inode(dst)); struct nfs_lock_context *src_lock; struct nfs_lock_context *dst_lock; - struct nfs4_exception src_exception = { }; - struct nfs4_exception dst_exception = { }; + struct nfs42_copy_args args = { + .src_fh = NFS_FH(file_inode(src)), + .src_pos = pos_src, + .dst_fh = NFS_FH(file_inode(dst)), + .dst_pos = pos_dst, + .count = count, + }; + struct nfs42_copy_res res; + struct nfs4_exception src_exception = { + .inode = file_inode(src), + .stateid = &args.src_stateid, + }; + struct nfs4_exception dst_exception = { + .inode = file_inode(dst), + .stateid = &args.dst_stateid, + }; ssize_t err, err2; if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY)) @@ -207,7 +217,6 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, if (IS_ERR(src_lock)) return PTR_ERR(src_lock); - src_exception.inode = file_inode(src); src_exception.state = src_lock->open_context->state; dst_lock = nfs_get_lock_context(nfs_file_open_context(dst)); @@ -216,15 +225,17 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, goto out_put_src_lock; } - dst_exception.inode = file_inode(dst); dst_exception.state = dst_lock->open_context->state; do { inode_lock(file_inode(dst)); - err = _nfs42_proc_copy(src, pos_src, src_lock, - dst, pos_dst, dst_lock, count); + err = _nfs42_proc_copy(src, src_lock, + dst, dst_lock, + &args, &res); inode_unlock(file_inode(dst)); + if (err >= 0) + break; if (err == -ENOTSUPP) { err = -EOPNOTSUPP; break; From c65db336d6c6f82e30e0bdb771517297db8e880e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Feb 2017 19:49:09 -0500 Subject: [PATCH 127/357] pNFS/flexfiles: If the layout is invalid, it must be updated before retrying commit df3ab232e462bce20710596d697ade6b72497694 upstream. If we see that our pNFS READ/WRITE/COMMIT operation failed, but we also see that our layout segment is no longer valid, then we need to get a new layout segment before retrying. Fixes: 90816d1ddacf ("NFSv4.1/flexfiles: Don't mark the entire deviceid...") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/flexfilelayout/flexfilelayout.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index a5c38889e7ae..13abd608af0f 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1073,9 +1073,6 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, struct nfs_client *mds_client = mds_server->nfs_client; struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; - if (task->tk_status >= 0) - return 0; - switch (task->tk_status) { /* MDS state errors */ case -NFS4ERR_DELEG_REVOKED: @@ -1176,9 +1173,6 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, { struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); - if (task->tk_status >= 0) - return 0; - switch (task->tk_status) { /* File access problems. Don't mark the device as unavailable */ case -EACCES: @@ -1213,6 +1207,13 @@ static int ff_layout_async_handle_error(struct rpc_task *task, { int vers = clp->cl_nfs_mod->rpc_vers->number; + if (task->tk_status >= 0) + return 0; + + /* Handle the case of an invalid layout segment */ + if (!pnfs_is_valid_lseg(lseg)) + return -NFS4ERR_RESET_TO_PNFS; + switch (vers) { case 3: return ff_layout_async_handle_error_v3(task, lseg, idx); From 3f22cc6f5ca335de95a49e9a217efaf1034e26ae Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 23 Feb 2017 14:53:39 -0500 Subject: [PATCH 128/357] NFSv4: fix getacl head length estimation commit 6682c14bbe505a8b912c57faf544f866777ee48d upstream. Bitmap and attrlen follow immediately after the op reply header. This was an oversight from commit bf118a342f. Consequences of this are just minor efficiency (extra calls to xdr_shrink_bufhead). Fixes: bf118a342f10 "NFSv4: include bitmap in nfsv4 get acl data" Reviewed-by: Kinglong Mee Signed-off-by: J. Bruce Fields Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index fc89e5ed07ee..c9c4d9855976 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2492,7 +2492,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); - replen = hdr.replen + op_decode_hdr_maxsz + 1; + replen = hdr.replen + op_decode_hdr_maxsz; encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); xdr_inline_pages(&req->rq_rcv_buf, replen << 2, From d78f93384da1e21825144fdfac5dc0bc18db21bd Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 23 Feb 2017 14:54:21 -0500 Subject: [PATCH 129/357] NFSv4: fix getacl ERANGE for some ACL buffer sizes commit ed92d8c137b7794c2c2aa14479298b9885967607 upstream. We're not taking into account that the space needed for the (variable length) attr bitmap, with the result that we'd sometimes get a spurious ERANGE when the ACL data got close to the end of a page. Just add in an extra page to make sure. Signed-off-by: Weston Andros Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 123105f4be53..609840de31d3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4990,7 +4990,7 @@ out: */ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) { - struct page *pages[NFS4ACL_MAXPAGES] = {NULL, }; + struct page *pages[NFS4ACL_MAXPAGES + 1] = {NULL, }; struct nfs_getaclargs args = { .fh = NFS_FH(inode), .acl_pages = pages, @@ -5004,13 +5004,9 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .rpc_argp = &args, .rpc_resp = &res, }; - unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); + unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1; int ret = -ENOMEM, i; - /* As long as we're doing a round trip to the server anyway, - * let's be prepared for a page of acl data. */ - if (npages == 0) - npages = 1; if (npages > ARRAY_SIZE(pages)) return -ERANGE; From ec160ad2acaad4a7de6eab7338ef5e3b2b19f907 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Mon, 19 Dec 2016 20:10:48 +0800 Subject: [PATCH 130/357] f2fs: fix a problem of using memory after free commit 7855eba4d6102f811b6dd142d6c749f53b591fa3 upstream. This patch fix a problem of using memory after free in function __try_merge_extent_node. Fixes: 0f825ee6e873 ("f2fs: add new interfaces for extent tree") Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/extent_cache.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 2b06d4fcd954..7b32ce979fe1 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -352,11 +352,12 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode, } if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) { - if (en) - __release_extent_node(sbi, et, prev_ex); next_ex->ei.fofs = ei->fofs; next_ex->ei.blk = ei->blk; next_ex->ei.len += ei->len; + if (en) + __release_extent_node(sbi, et, prev_ex); + en = next_ex; } From d00d1b71d98468ad6fbee590705c27fc1241f96e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 14 Feb 2017 09:54:37 -0800 Subject: [PATCH 131/357] f2fs: fix multiple f2fs_add_link() calls having same name commit 88c5c13a5027b36d914536fdba23f069d7067204 upstream. It turns out a stakable filesystem like sdcardfs in AOSP can trigger multiple vfs_create() to lower filesystem. In that case, f2fs will add multiple dentries having same name which breaks filesystem consistency. Until upper layer fixes, let's work around by f2fs, which shows actually not much performance regression. Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/dir.c | 34 +++++++++++++++++++++++++++++----- fs/f2fs/f2fs.h | 1 + 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 369f4513be37..ebdc90fc71b7 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -207,9 +207,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, f2fs_put_page(dentry_page, 0); } - if (!de && room && F2FS_I(dir)->chash != namehash) { - F2FS_I(dir)->chash = namehash; - F2FS_I(dir)->clevel = level; + /* This is to increase the speed of f2fs_create */ + if (!de && room) { + F2FS_I(dir)->task = current; + if (F2FS_I(dir)->chash != namehash) { + F2FS_I(dir)->chash = namehash; + F2FS_I(dir)->clevel = level; + } } return de; @@ -643,14 +647,34 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode) { struct fscrypt_name fname; + struct page *page = NULL; + struct f2fs_dir_entry *de = NULL; int err; err = fscrypt_setup_filename(dir, name, 0, &fname); if (err) return err; - err = __f2fs_do_add_link(dir, &fname, inode, ino, mode); - + /* + * An immature stakable filesystem shows a race condition between lookup + * and create. If we have same task when doing lookup and create, it's + * definitely fine as expected by VFS normally. Otherwise, let's just + * verify on-disk dentry one more time, which guarantees filesystem + * consistency more. + */ + if (current != F2FS_I(dir)->task) { + de = __f2fs_find_entry(dir, &fname, &page); + F2FS_I(dir)->task = NULL; + } + if (de) { + f2fs_dentry_kunmap(dir, page); + f2fs_put_page(page, 0); + err = -EEXIST; + } else if (IS_ERR(page)) { + err = PTR_ERR(page); + } else { + err = __f2fs_do_add_link(dir, &fname, inode, ino, mode); + } fscrypt_free_filename(&fname); return err; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 506af456412f..ecc3fd0ca6bb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -431,6 +431,7 @@ struct f2fs_inode_info { atomic_t dirty_pages; /* # of dirty pages */ f2fs_hash_t chash; /* hash value of given file name */ unsigned int clevel; /* maximum level of given file name */ + struct task_struct *task; /* lookup and create consistency */ nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ loff_t last_disk_size; /* lastly written file size */ From 4992ba2840bd40a85887a1a0e5d69f688f58f9dd Mon Sep 17 00:00:00 2001 From: Hou Pengyang Date: Thu, 16 Feb 2017 12:34:31 +0000 Subject: [PATCH 132/357] f2fs: add ovp valid_blocks check for bg gc victim to fg_gc commit e93b9865251a0503d83fd570e7d5a7c8bc351715 upstream. For foreground gc, greedy algorithm should be adapted, which makes this formula work well: (2 * (100 / config.overprovision + 1) + 6) But currently, we fg_gc have a prior to select bg_gc victim segments to gc first, these victims are selected by cost-benefit algorithm, we can't guarantee such segments have the small valid blocks, which may destroy the f2fs rule, on the worstest case, would consume all the free segments. This patch fix this by add a filter in check_bg_victims, if segment's has # of valid blocks over overprovision ratio, skip such segments. Signed-off-by: Hou Pengyang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/f2fs.h | 3 +++ fs/f2fs/gc.c | 22 ++++++++++++++++++++-- fs/f2fs/segment.h | 9 +++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ecc3fd0ca6bb..3a1640be7ffc 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -834,6 +834,9 @@ struct f2fs_sb_info { struct f2fs_gc_kthread *gc_thread; /* GC thread */ unsigned int cur_victim_sec; /* current victim section num */ + /* threshold for converting bg victims for fg */ + u64 fggc_threshold; + /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 6f14ee923acd..34a69e7ed90b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -166,7 +166,8 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->ofs_unit = sbi->segs_per_sec; } - if (p->max_search > sbi->max_victim_search) + /* we need to check every dirty segments in the FG_GC case */ + if (gc_type != FG_GC && p->max_search > sbi->max_victim_search) p->max_search = sbi->max_victim_search; p->offset = sbi->last_victim[p->gc_mode]; @@ -199,6 +200,10 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) { if (sec_usage_check(sbi, secno)) continue; + + if (no_fggc_candidate(sbi, secno)) + continue; + clear_bit(secno, dirty_i->victim_secmap); return secno * sbi->segs_per_sec; } @@ -322,13 +327,15 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, nsearched++; } - secno = GET_SECNO(sbi, segno); if (sec_usage_check(sbi, secno)) goto next; if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) goto next; + if (gc_type == FG_GC && p.alloc_mode == LFS && + no_fggc_candidate(sbi, secno)) + goto next; cost = get_gc_cost(sbi, segno, &p); @@ -972,5 +979,16 @@ stop: void build_gc_manager(struct f2fs_sb_info *sbi) { + u64 main_count, resv_count, ovp_count, blocks_per_sec; + DIRTY_I(sbi)->v_ops = &default_v_ops; + + /* threshold of # of valid blocks in a section for victims of FG_GC */ + main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg; + resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg; + ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; + blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec; + + sbi->fggc_threshold = div_u64((main_count - ovp_count) * blocks_per_sec, + (main_count - resv_count)); } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index fecb856ad874..b164f8339281 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -688,6 +688,15 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) - (base + 1) + type; } +static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi, + unsigned int secno) +{ + if (get_valid_blocks(sbi, secno, sbi->segs_per_sec) >= + sbi->fggc_threshold) + return true; + return false; +} + static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) { if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno)) From 8c53efc399565ef4be8945c9bf45c94153699221 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 27 Feb 2017 11:57:11 -0800 Subject: [PATCH 133/357] f2fs: avoid to issue redundant discard commands commit 8b107f5b97772c7c0c218302e9a4d15b4edf50b4 upstream. If segs_per_sec is over 1 like under SMR, previously f2fs issues discard commands redundantly on the same section, since we didn't move end position for the previous discard command. E.g., start end | | prefree_bitmap = [01111100111100] And, after issue discard for this section, end start | | prefree_bitmap = [01111100111100] Select this section again by searching from (end + 1), start end | | prefree_bitmap = [01111100111100] Fixes: 36abef4e796d38 ("f2fs: introduce mode=lfs mount option") Cc: Damien Le Moal Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fc886f008449..a7943f861d68 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -813,6 +813,8 @@ next: start = start_segno + sbi->segs_per_sec; if (start < end) goto next; + else + end = start - 1; } mutex_unlock(&dirty_i->seglist_lock); From 616f5ef61338a53e997eebbe2c13b21c40df9b86 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 23 Jan 2017 11:41:46 +0100 Subject: [PATCH 134/357] rtc: sun6i: Disable the build as a module commit 3753941475ae6501dcd1e41832bd0e6c35247d6a upstream. Since we have to provide the clock very early on, the RTC driver cannot be built as a module. Make sure that won't happen. Signed-off-by: Maxime Ripard Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/Kconfig | 2 +- drivers/rtc/rtc-sun6i.c | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index e859d148aba9..0723c97ebea3 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1432,7 +1432,7 @@ config RTC_DRV_SUN4V based RTC on SUN4V systems. config RTC_DRV_SUN6I - tristate "Allwinner A31 RTC" + bool "Allwinner A31 RTC" default MACH_SUN6I || MACH_SUN8I || COMPILE_TEST depends on ARCH_SUNXI help diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c index c169a2cd4727..a6136cb99851 100644 --- a/drivers/rtc/rtc-sun6i.c +++ b/drivers/rtc/rtc-sun6i.c @@ -439,9 +439,4 @@ static struct platform_driver sun6i_rtc_driver = { .of_match_table = sun6i_rtc_dt_ids, }, }; - -module_platform_driver(sun6i_rtc_driver); - -MODULE_DESCRIPTION("sun6i RTC driver"); -MODULE_AUTHOR("Chen-Yu Tsai "); -MODULE_LICENSE("GPL"); +builtin_platform_driver(sun6i_rtc_driver); From 6aae7ffa33072e6d3a208039ac015c1a0fb51e37 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 23 Jan 2017 11:41:47 +0100 Subject: [PATCH 135/357] rtc: sun6i: Add some locking commit a9422a19ce270a22fc520f2278fb7e80c58be508 upstream. Some registers have a read-modify-write access pattern that are not atomic. Add some locking to prevent from concurrent accesses. Acked-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-sun6i.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c index a6136cb99851..72086f0c0782 100644 --- a/drivers/rtc/rtc-sun6i.c +++ b/drivers/rtc/rtc-sun6i.c @@ -114,13 +114,17 @@ struct sun6i_rtc_dev { void __iomem *base; int irq; unsigned long alarm; + + spinlock_t lock; }; static irqreturn_t sun6i_rtc_alarmirq(int irq, void *id) { struct sun6i_rtc_dev *chip = (struct sun6i_rtc_dev *) id; + irqreturn_t ret = IRQ_NONE; u32 val; + spin_lock(&chip->lock); val = readl(chip->base + SUN6I_ALRM_IRQ_STA); if (val & SUN6I_ALRM_IRQ_STA_CNT_IRQ_PEND) { @@ -129,10 +133,11 @@ static irqreturn_t sun6i_rtc_alarmirq(int irq, void *id) rtc_update_irq(chip->rtc, 1, RTC_AF | RTC_IRQF); - return IRQ_HANDLED; + ret = IRQ_HANDLED; } + spin_unlock(&chip->lock); - return IRQ_NONE; + return ret; } static void sun6i_rtc_setaie(int to, struct sun6i_rtc_dev *chip) @@ -140,6 +145,7 @@ static void sun6i_rtc_setaie(int to, struct sun6i_rtc_dev *chip) u32 alrm_val = 0; u32 alrm_irq_val = 0; u32 alrm_wake_val = 0; + unsigned long flags; if (to) { alrm_val = SUN6I_ALRM_EN_CNT_EN; @@ -150,9 +156,11 @@ static void sun6i_rtc_setaie(int to, struct sun6i_rtc_dev *chip) chip->base + SUN6I_ALRM_IRQ_STA); } + spin_lock_irqsave(&chip->lock, flags); writel(alrm_val, chip->base + SUN6I_ALRM_EN); writel(alrm_irq_val, chip->base + SUN6I_ALRM_IRQ_EN); writel(alrm_wake_val, chip->base + SUN6I_ALARM_CONFIG); + spin_unlock_irqrestore(&chip->lock, flags); } static int sun6i_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) @@ -191,11 +199,15 @@ static int sun6i_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) static int sun6i_rtc_getalarm(struct device *dev, struct rtc_wkalrm *wkalrm) { struct sun6i_rtc_dev *chip = dev_get_drvdata(dev); + unsigned long flags; u32 alrm_st; u32 alrm_en; + spin_lock_irqsave(&chip->lock, flags); alrm_en = readl(chip->base + SUN6I_ALRM_IRQ_EN); alrm_st = readl(chip->base + SUN6I_ALRM_IRQ_STA); + spin_unlock_irqrestore(&chip->lock, flags); + wkalrm->enabled = !!(alrm_en & SUN6I_ALRM_EN_CNT_EN); wkalrm->pending = !!(alrm_st & SUN6I_ALRM_EN_CNT_EN); rtc_time_to_tm(chip->alarm, &wkalrm->time); @@ -356,6 +368,7 @@ static int sun6i_rtc_probe(struct platform_device *pdev) chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL); if (!chip) return -ENOMEM; + spin_lock_init(&chip->lock); platform_set_drvdata(pdev, chip); chip->dev = &pdev->dev; From de2aa5b3ee76e97dc3cea33cbd3d1bd443fda7a0 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 23 Jan 2017 11:41:48 +0100 Subject: [PATCH 136/357] rtc: sun6i: Switch to the external oscillator commit fb61bb82cb46a932ef2fc62e1c731c8e7e6640d5 upstream. The RTC is clocked from either an internal, imprecise, oscillator or an external one, which is usually much more accurate. The difference perceived between the time elapsed and the time reported by the RTC is in a 10% scale, which prevents the RTC from being useful at all. Fortunately, the external oscillator is reported to be mandatory in the Allwinner datasheet, so we can just switch to it. Fixes: 9765d2d94309 ("rtc: sun6i: Add sun6i RTC driver") Signed-off-by: Maxime Ripard Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-sun6i.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c index 72086f0c0782..b0d45d23a11b 100644 --- a/drivers/rtc/rtc-sun6i.c +++ b/drivers/rtc/rtc-sun6i.c @@ -37,9 +37,11 @@ /* Control register */ #define SUN6I_LOSC_CTRL 0x0000 +#define SUN6I_LOSC_CTRL_KEY (0x16aa << 16) #define SUN6I_LOSC_CTRL_ALM_DHMS_ACC BIT(9) #define SUN6I_LOSC_CTRL_RTC_HMS_ACC BIT(8) #define SUN6I_LOSC_CTRL_RTC_YMD_ACC BIT(7) +#define SUN6I_LOSC_CTRL_EXT_OSC BIT(0) #define SUN6I_LOSC_CTRL_ACC_MASK GENMASK(9, 7) /* RTC */ @@ -417,6 +419,10 @@ static int sun6i_rtc_probe(struct platform_device *pdev) /* disable alarm wakeup */ writel(0, chip->base + SUN6I_ALARM_CONFIG); + /* switch to the external, more precise, oscillator */ + writel(SUN6I_LOSC_CTRL_KEY | SUN6I_LOSC_CTRL_EXT_OSC, + chip->base + SUN6I_LOSC_CTRL); + chip->rtc = rtc_device_register("rtc-sun6i", &pdev->dev, &sun6i_rtc_ops, THIS_MODULE); if (IS_ERR(chip->rtc)) { From 6f9c02ab9d0d5aefa15953ad3d55912e3a7abb57 Mon Sep 17 00:00:00 2001 From: "colyli@suse.de" Date: Sat, 28 Jan 2017 21:11:49 +0800 Subject: [PATCH 137/357] md linear: fix a race between linear_add() and linear_congested() commit 03a9e24ef2aaa5f1f9837356aed79c860521407a upstream. Recently I receive a bug report that on Linux v3.0 based kerenl, hot add disk to a md linear device causes kernel crash at linear_congested(). From the crash image analysis, I find in linear_congested(), mddev->raid_disks contains value N, but conf->disks[] only has N-1 pointers available. Then a NULL pointer deference crashes the kernel. There is a race between linear_add() and linear_congested(), RCU stuffs used in these two functions cannot avoid the race. Since Linuv v4.0 RCU code is replaced by introducing mddev_suspend(). After checking the upstream code, it seems linear_congested() is not called in generic_make_request() code patch, so mddev_suspend() cannot provent it from being called. The possible race still exists. Here I explain how the race still exists in current code. For a machine has many CPUs, on one CPU, linear_add() is called to add a hard disk to a md linear device; at the same time on other CPU, linear_congested() is called to detect whether this md linear device is congested before issuing an I/O request onto it. Now I use a possible code execution time sequence to demo how the possible race happens, seq linear_add() linear_congested() 0 conf=mddev->private 1 oldconf=mddev->private 2 mddev->raid_disks++ 3 for (i=0; iraid_disks;i++) 4 bdev_get_queue(conf->disks[i].rdev->bdev) 5 mddev->private=newconf In linear_add() mddev->raid_disks is increased in time seq 2, and on another CPU in linear_congested() the for-loop iterates conf->disks[i] by the increased mddev->raid_disks in time seq 3,4. But conf with one more element (which is a pointer to struct dev_info type) to conf->disks[] is not updated yet, accessing its structure member in time seq 4 will cause a NULL pointer deference fault. To fix this race, there are 2 parts of modification in the patch, 1) Add 'int raid_disks' in struct linear_conf, as a copy of mddev->raid_disks. It is initialized in linear_conf(), always being consistent with pointers number of 'struct dev_info disks[]'. When iterating conf->disks[] in linear_congested(), use conf->raid_disks to replace mddev->raid_disks in the for-loop, then NULL pointer deference will not happen again. 2) RCU stuffs are back again, and use kfree_rcu() in linear_add() to free oldconf memory. Because oldconf may be referenced as mddev->private in linear_congested(), kfree_rcu() makes sure that its memory will not be released until no one uses it any more. Also some code comments are added in this patch, to make this modification to be easier understandable. This patch can be applied for kernels since v4.0 after commit: 3be260cc18f8 ("md/linear: remove rcu protections in favour of suspend/resume"). But this bug is reported on Linux v3.0 based kernel, for people who maintain kernels before Linux v4.0, they need to do some back back port to this patch. Changelog: - V3: add 'int raid_disks' in struct linear_conf, and use kfree_rcu() to replace rcu_call() in linear_add(). - v2: add RCU stuffs by suggestion from Shaohua and Neil. - v1: initial effort. Signed-off-by: Coly Li Cc: Shaohua Li Cc: Neil Brown Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/linear.c | 39 ++++++++++++++++++++++++++++++++++----- drivers/md/linear.h | 1 + 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 86f5d435901d..b0c0aef92a37 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -52,18 +52,26 @@ static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector) return conf->disks + lo; } +/* + * In linear_congested() conf->raid_disks is used as a copy of + * mddev->raid_disks to iterate conf->disks[], because conf->raid_disks + * and conf->disks[] are created in linear_conf(), they are always + * consitent with each other, but mddev->raid_disks does not. + */ static int linear_congested(struct mddev *mddev, int bits) { struct linear_conf *conf; int i, ret = 0; - conf = mddev->private; + rcu_read_lock(); + conf = rcu_dereference(mddev->private); - for (i = 0; i < mddev->raid_disks && !ret ; i++) { + for (i = 0; i < conf->raid_disks && !ret ; i++) { struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev); ret |= bdi_congested(&q->backing_dev_info, bits); } + rcu_read_unlock(); return ret; } @@ -143,6 +151,19 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) conf->disks[i-1].end_sector + conf->disks[i].rdev->sectors; + /* + * conf->raid_disks is copy of mddev->raid_disks. The reason to + * keep a copy of mddev->raid_disks in struct linear_conf is, + * mddev->raid_disks may not be consistent with pointers number of + * conf->disks[] when it is updated in linear_add() and used to + * iterate old conf->disks[] earray in linear_congested(). + * Here conf->raid_disks is always consitent with number of + * pointers in conf->disks[] array, and mddev->private is updated + * with rcu_assign_pointer() in linear_addr(), such race can be + * avoided. + */ + conf->raid_disks = raid_disks; + return conf; out: @@ -195,15 +216,23 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev) if (!newconf) return -ENOMEM; + /* newconf->raid_disks already keeps a copy of * the increased + * value of mddev->raid_disks, WARN_ONCE() is just used to make + * sure of this. It is possible that oldconf is still referenced + * in linear_congested(), therefore kfree_rcu() is used to free + * oldconf until no one uses it anymore. + */ mddev_suspend(mddev); - oldconf = mddev->private; + oldconf = rcu_dereference(mddev->private); mddev->raid_disks++; - mddev->private = newconf; + WARN_ONCE(mddev->raid_disks != newconf->raid_disks, + "copied raid_disks doesn't match mddev->raid_disks"); + rcu_assign_pointer(mddev->private, newconf); md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); set_capacity(mddev->gendisk, mddev->array_sectors); mddev_resume(mddev); revalidate_disk(mddev->gendisk); - kfree(oldconf); + kfree_rcu(oldconf, rcu); return 0; } diff --git a/drivers/md/linear.h b/drivers/md/linear.h index b685ddd7d7f7..8d392e6098b3 100644 --- a/drivers/md/linear.h +++ b/drivers/md/linear.h @@ -10,6 +10,7 @@ struct linear_conf { struct rcu_head rcu; sector_t array_sectors; + int raid_disks; /* a copy of mddev->raid_disks */ struct dev_info disks[0]; }; #endif From 178d07a0b8c4dc1f6e8a924f04231a5cf9cf86d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sat, 28 Jan 2017 14:31:22 +0100 Subject: [PATCH 138/357] bcma: use (get|put)_device when probing/removing device driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a971df0b9d04674e325346c17de9a895425ca5e1 upstream. This allows tracking device state and e.g. makes devm work as expected. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/bcma/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 2c1798e38abd..38688236b3cd 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -633,8 +633,11 @@ static int bcma_device_probe(struct device *dev) drv); int err = 0; + get_device(dev); if (adrv->probe) err = adrv->probe(core); + if (err) + put_device(dev); return err; } @@ -647,6 +650,7 @@ static int bcma_device_remove(struct device *dev) if (adrv->remove) adrv->remove(core); + put_device(dev); return 0; } From 9d82393e658cf7010b90a79d7c065e1766e5480c Mon Sep 17 00:00:00 2001 From: Mark Marshall Date: Thu, 26 Jan 2017 16:18:27 +0100 Subject: [PATCH 139/357] mtd: nand: ifc: Fix location of eccstat registers for IFC V1.0 commit 656441478ed55d960df5f3ccdf5a0f8c61dfd0b3 upstream. The commit 7a654172161c ("mtd/ifc: Add support for IFC controller version 2.0") added support for version 2.0 of the IFC controller. The version 2.0 controller has the ECC status registers at a different location to the previous versions. Correct the fsl_ifc_nand structure so that the ECC status can be read from the correct location for both version 1.0 and 2.0 of the controller. Fixes: 7a654172161c ("mtd/ifc: Add support for IFC controller version 2.0") Signed-off-by: Mark Marshall Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/fsl_ifc_nand.c | 8 +++++++- include/linux/fsl_ifc.h | 8 ++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index 0a177b1bfe3e..d1570f512f0b 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -258,9 +258,15 @@ static void fsl_ifc_run_command(struct mtd_info *mtd) int bufnum = nctrl->page & priv->bufnum_mask; int sector = bufnum * chip->ecc.steps; int sector_end = sector + chip->ecc.steps - 1; + __be32 *eccstat_regs; + + if (ctrl->version >= FSL_IFC_VERSION_2_0_0) + eccstat_regs = ifc->ifc_nand.v2_nand_eccstat; + else + eccstat_regs = ifc->ifc_nand.v1_nand_eccstat; for (i = sector / 4; i <= sector_end / 4; i++) - eccstat[i] = ifc_in32(&ifc->ifc_nand.nand_eccstat[i]); + eccstat[i] = ifc_in32(&eccstat_regs[i]); for (i = sector; i <= sector_end; i++) { errors = check_read_ecc(mtd, ctrl, eccstat, i); diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h index 3f9778cbc79d..c332f0a45607 100644 --- a/include/linux/fsl_ifc.h +++ b/include/linux/fsl_ifc.h @@ -733,8 +733,12 @@ struct fsl_ifc_nand { __be32 nand_erattr1; u32 res19[0x10]; __be32 nand_fsr; - u32 res20[0x3]; - __be32 nand_eccstat[6]; + u32 res20; + /* The V1 nand_eccstat is actually 4 words that overlaps the + * V2 nand_eccstat. + */ + __be32 v1_nand_eccstat[2]; + __be32 v2_nand_eccstat[6]; u32 res21[0x1c]; __be32 nanndcr; u32 res22[0x2]; From 788d81d4e5d43ffc2dbcd4513dfec2808f5ff616 Mon Sep 17 00:00:00 2001 From: Magnus Lilja Date: Wed, 21 Dec 2016 22:13:58 +0100 Subject: [PATCH 140/357] dmaengine: ipu: Make sure the interrupt routine checks all interrupts. commit adee40b265d7568296e218f079f478197ffa15bf upstream. Commit 3d8cc00073d6 ("dmaengine: ipu: Consolidate duplicated irq handlers") consolidated the two interrupts routines into one, but the remaining interrupt routine only checks the status of the error interrupts, not the normal interrupts. This patch fixes that problem (tested on i.MX31 PDK board). Fixes: 3d8cc00073d6 ("dmaengine: ipu: Consolidate duplicated irq handlers") Cc: Vinod Koul Signed-off-by: Magnus Lilja Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/ipu/ipu_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c index dd184b50e5b4..284627806b88 100644 --- a/drivers/dma/ipu/ipu_irq.c +++ b/drivers/dma/ipu/ipu_irq.c @@ -272,7 +272,7 @@ static void ipu_irq_handler(struct irq_desc *desc) u32 status; int i, line; - for (i = IPU_IRQ_NR_FN_BANKS; i < IPU_IRQ_NR_BANKS; i++) { + for (i = 0; i < IPU_IRQ_NR_BANKS; i++) { struct ipu_irq_bank *bank = irq_bank + i; raw_spin_lock(&bank_lock); From ec3bc2c5ed576c27e4c89a4fc1654755a0fe71bb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Feb 2017 16:59:46 -0500 Subject: [PATCH 141/357] xprtrdma: Fix Read chunk padding commit 24abdf1be15c478e2821d6fc903a4a4440beff02 upstream. When pad optimization is disabled, rpcrdma_convert_iovs still does not add explicit XDR round-up padding to a Read chunk. Commit 677eb17e94ed ("xprtrdma: Fix XDR tail buffer marshalling") incorrectly short-circuited the test for whether round-up padding is needed that appears later in rpcrdma_convert_iovs. However, if this is indeed a regular Read chunk (and not a Position-Zero Read chunk), the tail iovec _always_ contains the chunk's padding, and never anything else. So, it's easy to just skip the tail when padding optimization is enabled, and add the tail in a subsequent Read chunk segment, if disabled. Fixes: 677eb17e94ed ("xprtrdma: Fix XDR tail buffer marshalling") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/rpc_rdma.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index d987c2d3dd6e..e5a8c22173c2 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -226,8 +226,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, if (len && n == RPCRDMA_MAX_SEGS) goto out_overflow; - /* When encoding the read list, the tail is always sent inline */ - if (type == rpcrdma_readch) + /* When encoding a Read chunk, the tail iovec contains an + * XDR pad and may be omitted. + */ + if (type == rpcrdma_readch && xprt_rdma_pad_optimize) return n; /* When encoding the Write list, some servers need to see an extra @@ -238,10 +240,6 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, return n; if (xdrbuf->tail[0].iov_len) { - /* the rpcrdma protocol allows us to omit any trailing - * xdr pad bytes, saving the server an RDMA operation. */ - if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) - return n; n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n); if (n == RPCRDMA_MAX_SEGS) goto out_overflow; From fab6c2caa48f892c4f3446aea9e253ca8a6187a6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Feb 2017 16:59:54 -0500 Subject: [PATCH 142/357] xprtrdma: Per-connection pad optimization commit b5f0afbea4f2ea52c613ac2b06cb6de2ea18cb6d upstream. Pad optimization is changed by echoing into /proc/sys/sunrpc/rdma_pad_optimize. This is a global setting, affecting all RPC-over-RDMA connections to all servers. The marshaling code picks up that value and uses it for decisions about how to construct each RPC-over-RDMA frame. Having it change suddenly in mid-operation can result in unexpected failures. And some servers a client mounts might need chunk round-up, while others don't. So instead, copy the pad_optimize setting into each connection's rpcrdma_ia when the transport is created, and use the copy, which can't change during the life of the connection, instead. This also removes a hack: rpcrdma_convert_iovs was using the remote-invalidation-expected flag to predict when it could leave out Write chunk padding. This is because the Linux server handles implicit XDR padding on Write chunks correctly, and only Linux servers can set the connection's remote-invalidation-expected flag. It's more sensible to use the pad optimization setting instead. Fixes: 677eb17e94ed ("xprtrdma: Fix XDR tail buffer marshalling") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/rpc_rdma.c | 28 ++++++++++++++-------------- net/sunrpc/xprtrdma/verbs.c | 1 + net/sunrpc/xprtrdma/xprt_rdma.h | 1 + 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index e5a8c22173c2..b8f46c186f02 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -186,9 +186,9 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n) */ static int -rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, - enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, - bool reminv_expected) +rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, + unsigned int pos, enum rpcrdma_chunktype type, + struct rpcrdma_mr_seg *seg) { int len, n, p, page_base; struct page **ppages; @@ -229,14 +229,15 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, /* When encoding a Read chunk, the tail iovec contains an * XDR pad and may be omitted. */ - if (type == rpcrdma_readch && xprt_rdma_pad_optimize) + if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup) return n; - /* When encoding the Write list, some servers need to see an extra - * segment for odd-length Write chunks. The upper layer provides - * space in the tail iovec for this purpose. + /* When encoding a Write chunk, some servers need to see an + * extra segment for non-XDR-aligned Write chunks. The upper + * layer provides space in the tail iovec that may be used + * for this purpose. */ - if (type == rpcrdma_writech && reminv_expected) + if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup) return n; if (xdrbuf->tail[0].iov_len) { @@ -291,7 +292,8 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, if (rtype == rpcrdma_areadch) pos = 0; seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false); + nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos, + rtype, seg); if (nsegs < 0) return ERR_PTR(nsegs); @@ -353,10 +355,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, } seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, + nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, rqst->rq_rcv_buf.head[0].iov_len, - wtype, seg, - r_xprt->rx_ia.ri_reminv_expected); + wtype, seg); if (nsegs < 0) return ERR_PTR(nsegs); @@ -421,8 +422,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, } seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg, - r_xprt->rx_ia.ri_reminv_expected); + nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg); if (nsegs < 0) return ERR_PTR(nsegs); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 8da7f6a4dfc3..39f9e31008bc 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -208,6 +208,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, /* Default settings for RPC-over-RDMA Version One */ r_xprt->rx_ia.ri_reminv_expected = false; + r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize; rsize = RPCRDMA_V1_DEF_INLINE_SIZE; wsize = RPCRDMA_V1_DEF_INLINE_SIZE; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index f6ae1b22da47..07db2d1e072d 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -75,6 +75,7 @@ struct rpcrdma_ia { unsigned int ri_max_inline_write; unsigned int ri_max_inline_read; bool ri_reminv_expected; + bool ri_implicit_roundup; struct ib_qp_attr ri_qp_attr; struct ib_qp_init_attr ri_qp_init_attr; }; From 73eea1c4000fd73c138dbf8826bc6e1fa901ae9b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Feb 2017 17:00:02 -0500 Subject: [PATCH 143/357] xprtrdma: Disable pad optimization by default commit c95a3c6b88658bcb8f77f85f31a0b9d9036e8016 upstream. Commit d5440e27d3e5 ("xprtrdma: Enable pad optimization") made the Linux client omit XDR round-up padding in normal Read and Write chunks so that the client doesn't have to register and invalidate 3-byte memory regions that contain no real data. Unfortunately, my cheery 2014 assessment that this optimization "is supported now by both Linux and Solaris servers" was premature. We've found bugs in Solaris in this area since commit d5440e27d3e5 ("xprtrdma: Enable pad optimization") was merged (SYMLINK is the main offender). So for maximum interoperability, I'm disabling this optimization again. If a CM private message is exchanged when connecting, the client recognizes that the server is Linux, and enables the optimization for that connection. Until now the Solaris server bugs did not impact common operations, and were thus largely benign. Soon, less capable devices on Linux NFS/RDMA clients will make use of Read chunks more often, and these Solaris bugs will prevent interoperation in more cases. Fixes: 677eb17e94ed ("xprtrdma: Fix XDR tail buffer marshalling") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/transport.c | 2 +- net/sunrpc/xprtrdma/verbs.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index ed5e285fd2ea..fa324fe73946 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -67,7 +67,7 @@ unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_inline_write_padding; static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; - int xprt_rdma_pad_optimize = 1; + int xprt_rdma_pad_optimize = 0; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 39f9e31008bc..4d5d5b1c98c4 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -216,6 +216,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, pmsg->cp_magic == rpcrdma_cmp_magic && pmsg->cp_version == RPCRDMA_CMP_VERSION) { r_xprt->rx_ia.ri_reminv_expected = true; + r_xprt->rx_ia.ri_implicit_roundup = true; rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); } From 86840a6305149ce9b466f3a841cffdb3c0fd2608 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Feb 2017 17:00:10 -0500 Subject: [PATCH 144/357] xprtrdma: Reduce required number of send SGEs commit 16f906d66cd76fb9895cbc628f447532a7ac1faa upstream. The MAX_SEND_SGES check introduced in commit 655fec6987be ("xprtrdma: Use gathered Send for large inline messages") fails for devices that have a small max_sge. Instead of checking for a large fixed maximum number of SGEs, check for a minimum small number. RPC-over-RDMA will switch to using a Read chunk if an xdr_buf has more pages than can fit in the device's max_sge limit. This is considerably better than failing all together to mount the server. This fix supports devices that have as few as three send SGEs available. Reported-by: Selvin Xavier Reported-by: Devesh Sharma Reported-by: Honggang Li Reported-by: Ram Amrani Fixes: 655fec6987be ("xprtrdma: Use gathered Send for large ...") Tested-by: Honggang Li Tested-by: Ram Amrani Tested-by: Steve Wise Reviewed-by: Parav Pandit Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/rpc_rdma.c | 26 +++++++++++++++++++++++--- net/sunrpc/xprtrdma/verbs.c | 13 +++++++------ net/sunrpc/xprtrdma/xprt_rdma.h | 2 ++ 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index b8f46c186f02..f57c9f0ab8f9 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -125,14 +125,34 @@ void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt) /* The client can send a request inline as long as the RPCRDMA header * plus the RPC call fit under the transport's inline limit. If the * combined call message size exceeds that limit, the client must use - * the read chunk list for this operation. + * a Read chunk for this operation. + * + * A Read chunk is also required if sending the RPC call inline would + * exceed this device's max_sge limit. */ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) { - struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct xdr_buf *xdr = &rqst->rq_snd_buf; + unsigned int count, remaining, offset; - return rqst->rq_snd_buf.len <= ia->ri_max_inline_write; + if (xdr->len > r_xprt->rx_ia.ri_max_inline_write) + return false; + + if (xdr->page_len) { + remaining = xdr->page_len; + offset = xdr->page_base & ~PAGE_MASK; + count = 0; + while (remaining) { + remaining -= min_t(unsigned int, + PAGE_SIZE - offset, remaining); + offset = 0; + if (++count > r_xprt->rx_ia.ri_max_send_sges) + return false; + } + } + + return true; } /* The client can't know how large the actual reply will be. Thus it diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 4d5d5b1c98c4..e2c37061edbe 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -479,18 +479,19 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) */ int rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, - struct rpcrdma_create_data_internal *cdata) + struct rpcrdma_create_data_internal *cdata) { struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; + unsigned int max_qp_wr, max_sge; struct ib_cq *sendcq, *recvcq; - unsigned int max_qp_wr; int rc; - if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) { - dprintk("RPC: %s: insufficient sge's available\n", - __func__); + max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES); + if (max_sge < RPCRDMA_MIN_SEND_SGES) { + pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); return -ENOMEM; } + ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES; if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { dprintk("RPC: %s: insufficient wqe's available\n", @@ -515,7 +516,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.cap.max_recv_wr = cdata->max_requests; ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */ - ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES; + ep->rep_attr.cap.max_send_sge = max_sge; ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_inline_data = 0; ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 07db2d1e072d..48989d5b2883 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -74,6 +74,7 @@ struct rpcrdma_ia { unsigned int ri_max_frmr_depth; unsigned int ri_max_inline_write; unsigned int ri_max_inline_read; + unsigned int ri_max_send_sges; bool ri_reminv_expected; bool ri_implicit_roundup; struct ib_qp_attr ri_qp_attr; @@ -310,6 +311,7 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ * - xdr_buf tail iovec */ enum { + RPCRDMA_MIN_SEND_SGES = 3, RPCRDMA_MAX_SEND_PAGES = PAGE_SIZE + RPCRDMA_MAX_INLINE - 1, RPCRDMA_MAX_PAGE_SGES = (RPCRDMA_MAX_SEND_PAGES >> PAGE_SHIFT) + 1, RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1, From fccb22e7d5df9b899228eb651747265202e5a321 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 22 Nov 2016 14:55:59 +0530 Subject: [PATCH 145/357] powerpc/xmon: Fix data-breakpoint commit c21a493a2b44650707d06741601894329486f2ad upstream. Currently xmon data-breakpoint feature is broken. Whenever there is a watchpoint match occurs, hw_breakpoint_handler will be called by do_break via notifier chains mechanism. If watchpoint is registered by xmon, hw_breakpoint_handler won't find any associated perf_event and returns immediately with NOTIFY_STOP. Similarly, do_break also returns without notifying to xmon. Solve this by returning NOTIFY_DONE when hw_breakpoint_handler does not find any perf_event associated with matched watchpoint, rather than NOTIFY_STOP, which tells the core code to continue calling the other breakpoint handlers including the xmon one. Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/hw_breakpoint.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 03d089b3ed72..469d86d1c2a5 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -228,8 +228,10 @@ int hw_breakpoint_handler(struct die_args *args) rcu_read_lock(); bp = __this_cpu_read(bp_per_reg); - if (!bp) + if (!bp) { + rc = NOTIFY_DONE; goto out; + } info = counter_arch_bp(bp); /* From 3552f917154a3472d3095988f98289ebdda5a8d3 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 7 Feb 2017 00:09:27 +0530 Subject: [PATCH 146/357] powerpc/mm: Add MMU_FTR_KERNEL_RO to possible feature mask commit a5ecdad4847897007399d7a14c9109b65ce4c9b7 upstream. Without this we will always find the feature disabled. Fixes: 984d7a1ec6 ("powerpc/mm: Fixup kernel read only mapping") Signed-off-by: Aneesh Kumar K.V Acked-by: Balbir Singh Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/mmu.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index a244e09d2d88..5d22b0bef3d8 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -136,6 +136,7 @@ enum { MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE | MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA | + MMU_FTR_KERNEL_RO | #ifdef CONFIG_PPC_RADIX_MMU MMU_FTR_TYPE_RADIX | #endif From 075be78c83989ba4e51ab05121bcb8f8b9c6ef6e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 22 Feb 2017 10:42:02 +0530 Subject: [PATCH 147/357] powerpc/mm/hash: Always clear UPRT and Host Radix bits when setting up CPU commit fda2d27db6eae5c2468f9e4657539b72bbc238bb upstream. We will set LPCR with correct value for radix during int. This make sure we start with a sanitized value of LPCR. In case of kexec, cpus can have LPCR value based on the previous translation mode we were running. Fixes: fe036a0605d60 ("powerpc/64/kexec: Fix MMU cleanup on radix") Acked-by: Michael Neuling Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/cpu_setup_power.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 37c027ca83b2..7803756998e2 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -100,6 +100,8 @@ _GLOBAL(__setup_cpu_power9) mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) or r3, r3, r4 + LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) + andc r3, r3, r4 bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 @@ -120,6 +122,8 @@ _GLOBAL(__restore_cpu_power9) mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) or r3, r3, r4 + LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) + andc r3, r3, r4 bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 From 8a2307c7c0189a63810c23260be1f76346ad949a Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 15 Dec 2016 12:27:21 +0100 Subject: [PATCH 148/357] MIPS: IP22: Reformat inline assembler code to modern standards. commit f9f1c8db1c37253805eaa32265e1e1af3ae7d0a4 upstream. Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/mm/sc-ip22.c | 43 ++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/arch/mips/mm/sc-ip22.c b/arch/mips/mm/sc-ip22.c index 026cb59a914d..610f44b4d764 100644 --- a/arch/mips/mm/sc-ip22.c +++ b/arch/mips/mm/sc-ip22.c @@ -31,26 +31,29 @@ static inline void indy_sc_wipe(unsigned long first, unsigned long last) unsigned long tmp; __asm__ __volatile__( - ".set\tpush\t\t\t# indy_sc_wipe\n\t" - ".set\tnoreorder\n\t" - ".set\tmips3\n\t" - ".set\tnoat\n\t" - "mfc0\t%2, $12\n\t" - "li\t$1, 0x80\t\t\t# Go 64 bit\n\t" - "mtc0\t$1, $12\n\t" - - "dli\t$1, 0x9000000080000000\n\t" - "or\t%0, $1\t\t\t# first line to flush\n\t" - "or\t%1, $1\t\t\t# last line to flush\n\t" - ".set\tat\n\t" - - "1:\tsw\t$0, 0(%0)\n\t" - "bne\t%0, %1, 1b\n\t" - " daddu\t%0, 32\n\t" - - "mtc0\t%2, $12\t\t\t# Back to 32 bit\n\t" - "nop; nop; nop; nop;\n\t" - ".set\tpop" + " .set push # indy_sc_wipe \n" + " .set noreorder \n" + " .set mips3 \n" + " .set noat \n" + " mfc0 %2, $12 \n" + " li $1, 0x80 # Go 64 bit \n" + " mtc0 $1, $12 \n" + " \n" + " dli $1, 0x9000000080000000 \n" + " or %0, $1 # first line to flush \n" + " or %1, $1 # last line to flush \n" + " .set at \n" + " \n" + "1: sw $0, 0(%0) \n" + " bne %0, %1, 1b \n" + " daddu %0, 32 \n" + " \n" + " mtc0 %2, $12 # Back to 32 bit \n" + " nop # pipeline hazard \n" + " nop \n" + " nop \n" + " nop \n" + " .set pop \n" : "=r" (first), "=r" (last), "=&r" (tmp) : "0" (first), "1" (last)); } From aae02d1aafe7c1989a3f10ff1f6ca5eed855426e Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 15 Dec 2016 12:39:22 +0100 Subject: [PATCH 149/357] MIPS: IP22: Fix build error due to binutils 2.25 uselessnes. commit ae2f5e5ed04a17c1aa1f0a3714c725e12c21d2a9 upstream. Fix the following build error with binutils 2.25. CC arch/mips/mm/sc-ip22.o {standard input}: Assembler messages: {standard input}:132: Error: number (0x9000000080000000) larger than 32 bits {standard input}:159: Error: number (0x9000000080000000) larger than 32 bits {standard input}:200: Error: number (0x9000000080000000) larger than 32 bits scripts/Makefile.build:293: recipe for target 'arch/mips/mm/sc-ip22.o' failed make[1]: *** [arch/mips/mm/sc-ip22.o] Error 1 MIPS has used .set mips3 to temporarily switch the assembler to 64 bit mode in 64 bit kernels virtually forever. Binutils 2.25 broke this behavious partially by happily accepting 64 bit instructions in .set mips3 mode but puking on 64 bit constants when generating 32 bit ELF. Binutils 2.26 restored the old behaviour again. Fix build with binutils 2.25 by open coding the offending dli $1, 0x9000000080000000 as li $1, 0x9000 dsll $1, $1, 48 which is ugly be the only thing that will build on all binutils vintages. Signed-off-by: Ralf Baechle Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/mips/mm/sc-ip22.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/mips/mm/sc-ip22.c b/arch/mips/mm/sc-ip22.c index 610f44b4d764..f293a97cb885 100644 --- a/arch/mips/mm/sc-ip22.c +++ b/arch/mips/mm/sc-ip22.c @@ -39,7 +39,18 @@ static inline void indy_sc_wipe(unsigned long first, unsigned long last) " li $1, 0x80 # Go 64 bit \n" " mtc0 $1, $12 \n" " \n" - " dli $1, 0x9000000080000000 \n" + " # \n" + " # Open code a dli $1, 0x9000000080000000 \n" + " # \n" + " # Required because binutils 2.25 will happily accept \n" + " # 64 bit instructions in .set mips3 mode but puke on \n" + " # 64 bit constants when generating 32 bit ELF \n" + " # \n" + " lui $1,0x9000 \n" + " dsll $1,$1,0x10 \n" + " ori $1,$1,0x8000 \n" + " dsll $1,$1,0x10 \n" + " \n" " or %0, $1 # first line to flush \n" " or %1, $1 # last line to flush \n" " .set at \n" From 27ab5414b980b19ba127cb518546d628f40bd3dc Mon Sep 17 00:00:00 2001 From: James Smart Date: Sun, 12 Feb 2017 13:52:25 -0800 Subject: [PATCH 150/357] scsi: lpfc: Correct WQ creation for pagesize commit 8ea73db486cda442f0671f4bc9c03a76be398a28 upstream. Correct WQ creation for pagesize The driver was calculating the adapter command pagesize indicator from the system pagesize. However, the buffers the driver allocates are only one size (SLI4_PAGE_SIZE), so no calculation was necessary. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Cc: Mauricio Faria de Oliveira Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_hw4.h | 2 ++ drivers/scsi/lpfc/lpfc_sli.c | 9 +++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index ee8022737591..55faa94637a9 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -1185,6 +1185,7 @@ struct lpfc_mbx_wq_create { #define lpfc_mbx_wq_create_page_size_SHIFT 0 #define lpfc_mbx_wq_create_page_size_MASK 0x000000FF #define lpfc_mbx_wq_create_page_size_WORD word1 +#define LPFC_WQ_PAGE_SIZE_4096 0x1 #define lpfc_mbx_wq_create_wqe_size_SHIFT 8 #define lpfc_mbx_wq_create_wqe_size_MASK 0x0000000F #define lpfc_mbx_wq_create_wqe_size_WORD word1 @@ -1256,6 +1257,7 @@ struct rq_context { #define lpfc_rq_context_page_size_SHIFT 0 /* Version 1 Only */ #define lpfc_rq_context_page_size_MASK 0x000000FF #define lpfc_rq_context_page_size_WORD word0 +#define LPFC_RQ_PAGE_SIZE_4096 0x1 uint32_t reserved1; uint32_t word2; #define lpfc_rq_context_cq_id_SHIFT 16 diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index f4f77c5b0c83..49b4c798de18 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -13678,7 +13678,7 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, LPFC_WQ_WQE_SIZE_128); bf_set(lpfc_mbx_wq_create_page_size, &wq_create->u.request_1, - (PAGE_SIZE/SLI4_PAGE_SIZE)); + LPFC_WQ_PAGE_SIZE_4096); page = wq_create->u.request_1.page; break; } @@ -13704,8 +13704,9 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, LPFC_WQ_WQE_SIZE_128); break; } - bf_set(lpfc_mbx_wq_create_page_size, &wq_create->u.request_1, - (PAGE_SIZE/SLI4_PAGE_SIZE)); + bf_set(lpfc_mbx_wq_create_page_size, + &wq_create->u.request_1, + LPFC_WQ_PAGE_SIZE_4096); page = wq_create->u.request_1.page; break; default: @@ -13891,7 +13892,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, LPFC_RQE_SIZE_8); bf_set(lpfc_rq_context_page_size, &rq_create->u.request.context, - (PAGE_SIZE/SLI4_PAGE_SIZE)); + LPFC_RQ_PAGE_SIZE_4096); } else { switch (hrq->entry_count) { default: From dc8470f3c831c93ffcda17612e8e58a73210b88b Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 19 Jan 2017 11:21:29 +0800 Subject: [PATCH 151/357] ceph: update readpages osd request according to size of pages commit d641df819db8b80198fd85d9de91137e8a823b07 upstream. add_to_page_cache_lru() can fails, so the actual pages to read can be smaller than the initial size of osd request. We need to update osd request size in that case. Signed-off-by: Yan, Zheng Reviewed-by: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- fs/ceph/addr.c | 1 + net/ceph/osd_client.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index ef3ebd780aff..1e643c718917 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -363,6 +363,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) nr_pages = i; if (nr_pages > 0) { len = nr_pages << PAGE_SHIFT; + osd_req_op_extent_update(req, 0, len); break; } goto out_pages; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index e6ae15bc41b7..0ffeb60cfe67 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -672,7 +672,8 @@ void osd_req_op_extent_update(struct ceph_osd_request *osd_req, BUG_ON(length > previous); op->extent.length = length; - op->indata_len -= previous - length; + if (op->op == CEPH_OSD_OP_WRITE || op->op == CEPH_OSD_OP_WRITEFULL) + op->indata_len -= previous - length; } EXPORT_SYMBOL(osd_req_op_extent_update); From 5f7ff59d067e1888fec91fccefdd84f3987b605e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 16 Jan 2017 18:24:56 +0100 Subject: [PATCH 152/357] netfilter: conntrack: remove GC_MAX_EVICTS break commit 524b698db06b9b6da7192e749f637904e2f62d7b upstream. Instead of breaking loop and instant resched, don't bother checking this in first place (the loop calls cond_resched for every bucket anyway). Suggested-by: Nicolas Dichtel Signed-off-by: Florian Westphal Acked-by: Nicolas Dichtel Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_core.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0f87e5d21be7..f8b142c66e6a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -88,8 +88,6 @@ static __read_mostly bool nf_conntrack_locks_all; #define GC_MAX_BUCKETS_DIV 64u /* upper bound of scan intervals */ #define GC_INTERVAL_MAX (2 * HZ) -/* maximum conntracks to evict per gc run */ -#define GC_MAX_EVICTS 256u static struct conntrack_gc_work conntrack_gc_work; @@ -979,8 +977,7 @@ static void gc_worker(struct work_struct *work) */ rcu_read_unlock(); cond_resched_rcu_qs(); - } while (++buckets < goal && - expired_count < GC_MAX_EVICTS); + } while (++buckets < goal); if (gc_work->exiting) return; @@ -1005,7 +1002,7 @@ static void gc_worker(struct work_struct *work) * In case we have lots of evictions next scan is done immediately. */ ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio >= 90 || expired_count == GC_MAX_EVICTS) { + if (ratio >= 90) { gc_work->next_gc_run = 0; next_run = 0; } else if (expired_count) { From 371d0342a39710e45e6f7c316ca7786b3e34d90f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Jan 2017 02:01:22 +0100 Subject: [PATCH 153/357] netfilter: conntrack: refine gc worker heuristics, redux commit e5072053b09642b8ff417d47da05b84720aea3ee upstream. This further refines the changes made to conntrack gc_worker in commit e0df8cae6c16 ("netfilter: conntrack: refine gc worker heuristics"). The main idea of that change was to reduce the scan interval when evictions take place. However, on the reporters' setup, there are 1-2 million conntrack entries in total and roughly 8k new (and closing) connections per second. In this case we'll always evict at least one entry per gc cycle and scan interval is always at 1 jiffy because of this test: } else if (expired_count) { gc_work->next_gc_run /= 2U; next_run = msecs_to_jiffies(1); being true almost all the time. Given we scan ~10k entries per run its clearly wrong to reduce interval based on nonzero eviction count, it will only waste cpu cycles since a vast majorities of conntracks are not timed out. Thus only look at the ratio (scanned entries vs. evicted entries) to make a decision on whether to reduce or not. Because evictor is supposed to only kick in when system turns idle after a busy period, pick a high ratio -- this makes it 50%. We thus keep the idea of increasing scan rate when its likely that table contains many expired entries. In order to not let timed-out entries hang around for too long (important when using event logging, in which case we want to timely destroy events), we now scan the full table within at most GC_MAX_SCAN_JIFFIES (16 seconds) even in worst-case scenario where all timed-out entries sit in same slot. I tested this with a vm under synflood (with sysctl net.netfilter.nf_conntrack_tcp_timeout_syn_recv=3). While flood is ongoing, interval now stays at its max rate (GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV -> 125ms). With feedback from Nicolas Dichtel. Reported-by: Denys Fedoryshchenko Cc: Nicolas Dichtel Fixes: b87a2f9199ea82eaadc ("netfilter: conntrack: add gc worker to remove timed-out entries") Signed-off-by: Florian Westphal Tested-by: Nicolas Dichtel Acked-by: Nicolas Dichtel Tested-by: Denys Fedoryshchenko Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_core.c | 39 ++++++++++++++++--------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f8b142c66e6a..6bd150882ba4 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -85,9 +85,11 @@ static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; /* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ -#define GC_MAX_BUCKETS_DIV 64u -/* upper bound of scan intervals */ -#define GC_INTERVAL_MAX (2 * HZ) +#define GC_MAX_BUCKETS_DIV 128u +/* upper bound of full table scan */ +#define GC_MAX_SCAN_JIFFIES (16u * HZ) +/* desired ratio of entries found to be expired */ +#define GC_EVICT_RATIO 50u static struct conntrack_gc_work conntrack_gc_work; @@ -936,6 +938,7 @@ static noinline int early_drop(struct net *net, unsigned int _hash) static void gc_worker(struct work_struct *work) { + unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); unsigned int i, goal, buckets = 0, expired_count = 0; struct conntrack_gc_work *gc_work; unsigned int ratio, scanned = 0; @@ -994,27 +997,25 @@ static void gc_worker(struct work_struct *work) * 1. Minimize time until we notice a stale entry * 2. Maximize scan intervals to not waste cycles * - * Normally, expired_count will be 0, this increases the next_run time - * to priorize 2) above. + * Normally, expire ratio will be close to 0. * - * As soon as a timed-out entry is found, move towards 1) and increase - * the scan frequency. - * In case we have lots of evictions next scan is done immediately. + * As soon as a sizeable fraction of the entries have expired + * increase scan frequency. */ ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio >= 90) { - gc_work->next_gc_run = 0; - next_run = 0; - } else if (expired_count) { - gc_work->next_gc_run /= 2U; - next_run = msecs_to_jiffies(1); + if (ratio > GC_EVICT_RATIO) { + gc_work->next_gc_run = min_interval; } else { - if (gc_work->next_gc_run < GC_INTERVAL_MAX) - gc_work->next_gc_run += msecs_to_jiffies(1); + unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV; - next_run = gc_work->next_gc_run; + BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0); + + gc_work->next_gc_run += min_interval; + if (gc_work->next_gc_run > max) + gc_work->next_gc_run = max; } + next_run = gc_work->next_gc_run; gc_work->last_bucket = i; queue_delayed_work(system_long_wq, &gc_work->dwork, next_run); } @@ -1022,7 +1023,7 @@ static void gc_worker(struct work_struct *work) static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); - gc_work->next_gc_run = GC_INTERVAL_MAX; + gc_work->next_gc_run = HZ; gc_work->exiting = false; } @@ -1915,7 +1916,7 @@ int nf_conntrack_init_start(void) nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); conntrack_gc_work_init(&conntrack_gc_work); - queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX); + queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ); return 0; From d379ab2707cfa16e774e2698ff5e32a1f7b243b8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 12 Mar 2017 06:42:15 +0100 Subject: [PATCH 154/357] Linux 4.9.14 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 14dc2758345b..5e7706e94622 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 13 +SUBLEVEL = 14 EXTRAVERSION = NAME = Roaring Lionus From e5b9778761558ff3d239ed76925a1a7a734918ea Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Tue, 28 Feb 2017 19:54:40 +0300 Subject: [PATCH 155/357] tty: n_hdlc: get rid of racy n_hdlc.tbuf commit 82f2341c94d270421f383641b7cd670e474db56b upstream. Currently N_HDLC line discipline uses a self-made singly linked list for data buffers and has n_hdlc.tbuf pointer for buffer retransmitting after an error. The commit be10eb7589337e5defbe214dae038a53dd21add8 ("tty: n_hdlc add buffer flushing") introduced racy access to n_hdlc.tbuf. After tx error concurrent flush_tx_queue() and n_hdlc_send_frames() can put one data buffer to tx_free_buf_list twice. That causes double free in n_hdlc_release(). Let's use standard kernel linked list and get rid of n_hdlc.tbuf: in case of tx error put current data buffer after the head of tx_buf_list. Signed-off-by: Alexander Popov Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_hdlc.c | 132 ++++++++++++++++++++++--------------------- 1 file changed, 69 insertions(+), 63 deletions(-) diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index a7fa016f31eb..6d1e2f746ab4 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -114,7 +114,7 @@ #define DEFAULT_TX_BUF_COUNT 3 struct n_hdlc_buf { - struct n_hdlc_buf *link; + struct list_head list_item; int count; char buf[1]; }; @@ -122,8 +122,7 @@ struct n_hdlc_buf { #define N_HDLC_BUF_SIZE (sizeof(struct n_hdlc_buf) + maxframe) struct n_hdlc_buf_list { - struct n_hdlc_buf *head; - struct n_hdlc_buf *tail; + struct list_head list; int count; spinlock_t spinlock; }; @@ -136,7 +135,6 @@ struct n_hdlc_buf_list { * @backup_tty - TTY to use if tty gets closed * @tbusy - reentrancy flag for tx wakeup code * @woke_up - FIXME: describe this field - * @tbuf - currently transmitting tx buffer * @tx_buf_list - list of pending transmit frame buffers * @rx_buf_list - list of received frame buffers * @tx_free_buf_list - list unused transmit frame buffers @@ -149,7 +147,6 @@ struct n_hdlc { struct tty_struct *backup_tty; int tbusy; int woke_up; - struct n_hdlc_buf *tbuf; struct n_hdlc_buf_list tx_buf_list; struct n_hdlc_buf_list rx_buf_list; struct n_hdlc_buf_list tx_free_buf_list; @@ -159,6 +156,8 @@ struct n_hdlc { /* * HDLC buffer list manipulation functions */ +static void n_hdlc_buf_return(struct n_hdlc_buf_list *buf_list, + struct n_hdlc_buf *buf); static void n_hdlc_buf_put(struct n_hdlc_buf_list *list, struct n_hdlc_buf *buf); static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *list); @@ -208,16 +207,9 @@ static void flush_tx_queue(struct tty_struct *tty) { struct n_hdlc *n_hdlc = tty2n_hdlc(tty); struct n_hdlc_buf *buf; - unsigned long flags; while ((buf = n_hdlc_buf_get(&n_hdlc->tx_buf_list))) n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, buf); - spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock, flags); - if (n_hdlc->tbuf) { - n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, n_hdlc->tbuf); - n_hdlc->tbuf = NULL; - } - spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags); } static struct tty_ldisc_ops n_hdlc_ldisc = { @@ -283,7 +275,6 @@ static void n_hdlc_release(struct n_hdlc *n_hdlc) } else break; } - kfree(n_hdlc->tbuf); kfree(n_hdlc); } /* end of n_hdlc_release() */ @@ -402,13 +393,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) n_hdlc->woke_up = 0; spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags); - /* get current transmit buffer or get new transmit */ - /* buffer from list of pending transmit buffers */ - - tbuf = n_hdlc->tbuf; - if (!tbuf) - tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list); - + tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list); while (tbuf) { if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d)sending frame %p, count=%d\n", @@ -420,7 +405,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) /* rollback was possible and has been done */ if (actual == -ERESTARTSYS) { - n_hdlc->tbuf = tbuf; + n_hdlc_buf_return(&n_hdlc->tx_buf_list, tbuf); break; } /* if transmit error, throw frame away by */ @@ -435,10 +420,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) /* free current transmit buffer */ n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, tbuf); - - /* this tx buffer is done */ - n_hdlc->tbuf = NULL; - + /* wait up sleeping writers */ wake_up_interruptible(&tty->write_wait); @@ -448,10 +430,12 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d)frame %p pending\n", __FILE__,__LINE__,tbuf); - - /* buffer not accepted by driver */ - /* set this buffer as pending buffer */ - n_hdlc->tbuf = tbuf; + + /* + * the buffer was not accepted by driver, + * return it back into tx queue + */ + n_hdlc_buf_return(&n_hdlc->tx_buf_list, tbuf); break; } } @@ -749,7 +733,8 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file, int error = 0; int count; unsigned long flags; - + struct n_hdlc_buf *buf = NULL; + if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d)n_hdlc_tty_ioctl() called %d\n", __FILE__,__LINE__,cmd); @@ -763,8 +748,10 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file, /* report count of read data available */ /* in next available frame (if any) */ spin_lock_irqsave(&n_hdlc->rx_buf_list.spinlock,flags); - if (n_hdlc->rx_buf_list.head) - count = n_hdlc->rx_buf_list.head->count; + buf = list_first_entry_or_null(&n_hdlc->rx_buf_list.list, + struct n_hdlc_buf, list_item); + if (buf) + count = buf->count; else count = 0; spin_unlock_irqrestore(&n_hdlc->rx_buf_list.spinlock,flags); @@ -776,8 +763,10 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file, count = tty_chars_in_buffer(tty); /* add size of next output frame in queue */ spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock,flags); - if (n_hdlc->tx_buf_list.head) - count += n_hdlc->tx_buf_list.head->count; + buf = list_first_entry_or_null(&n_hdlc->tx_buf_list.list, + struct n_hdlc_buf, list_item); + if (buf) + count += buf->count; spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock,flags); error = put_user(count, (int __user *)arg); break; @@ -825,14 +814,14 @@ static unsigned int n_hdlc_tty_poll(struct tty_struct *tty, struct file *filp, poll_wait(filp, &tty->write_wait, wait); /* set bits for operations that won't block */ - if (n_hdlc->rx_buf_list.head) + if (!list_empty(&n_hdlc->rx_buf_list.list)) mask |= POLLIN | POLLRDNORM; /* readable */ if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) mask |= POLLHUP; if (tty_hung_up_p(filp)) mask |= POLLHUP; if (!tty_is_writelocked(tty) && - n_hdlc->tx_free_buf_list.head) + !list_empty(&n_hdlc->tx_free_buf_list.list)) mask |= POLLOUT | POLLWRNORM; /* writable */ } return mask; @@ -856,7 +845,12 @@ static struct n_hdlc *n_hdlc_alloc(void) spin_lock_init(&n_hdlc->tx_free_buf_list.spinlock); spin_lock_init(&n_hdlc->rx_buf_list.spinlock); spin_lock_init(&n_hdlc->tx_buf_list.spinlock); - + + INIT_LIST_HEAD(&n_hdlc->rx_free_buf_list.list); + INIT_LIST_HEAD(&n_hdlc->tx_free_buf_list.list); + INIT_LIST_HEAD(&n_hdlc->rx_buf_list.list); + INIT_LIST_HEAD(&n_hdlc->tx_buf_list.list); + /* allocate free rx buffer list */ for(i=0;ispinlock, flags); + + list_add(&buf->list_item, &buf_list->list); + buf_list->count++; + + spin_unlock_irqrestore(&buf_list->spinlock, flags); +} + /** * n_hdlc_buf_put - add specified HDLC buffer to tail of specified list - * @list - pointer to buffer list + * @buf_list - pointer to buffer list * @buf - pointer to buffer */ -static void n_hdlc_buf_put(struct n_hdlc_buf_list *list, +static void n_hdlc_buf_put(struct n_hdlc_buf_list *buf_list, struct n_hdlc_buf *buf) { unsigned long flags; - spin_lock_irqsave(&list->spinlock,flags); - - buf->link=NULL; - if (list->tail) - list->tail->link = buf; - else - list->head = buf; - list->tail = buf; - (list->count)++; - - spin_unlock_irqrestore(&list->spinlock,flags); - + + spin_lock_irqsave(&buf_list->spinlock, flags); + + list_add_tail(&buf->list_item, &buf_list->list); + buf_list->count++; + + spin_unlock_irqrestore(&buf_list->spinlock, flags); } /* end of n_hdlc_buf_put() */ /** * n_hdlc_buf_get - remove and return an HDLC buffer from list - * @list - pointer to HDLC buffer list + * @buf_list - pointer to HDLC buffer list * * Remove and return an HDLC buffer from the head of the specified HDLC buffer * list. * Returns a pointer to HDLC buffer if available, otherwise %NULL. */ -static struct n_hdlc_buf* n_hdlc_buf_get(struct n_hdlc_buf_list *list) +static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *buf_list) { unsigned long flags; struct n_hdlc_buf *buf; - spin_lock_irqsave(&list->spinlock,flags); - - buf = list->head; + + spin_lock_irqsave(&buf_list->spinlock, flags); + + buf = list_first_entry_or_null(&buf_list->list, + struct n_hdlc_buf, list_item); if (buf) { - list->head = buf->link; - (list->count)--; + list_del(&buf->list_item); + buf_list->count--; } - if (!list->head) - list->tail = NULL; - - spin_unlock_irqrestore(&list->spinlock,flags); + + spin_unlock_irqrestore(&buf_list->spinlock, flags); return buf; - } /* end of n_hdlc_buf_get() */ static char hdlc_banner[] __initdata = From c4c590be494404531f54fa1acf580a08031f5bb9 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 3 Feb 2017 20:25:00 +0000 Subject: [PATCH 156/357] serial: 8250_pci: Add MKS Tenta SCOM-0800 and SCOM-0801 cards commit 1c9c858e2ff8ae8024a3d75d2ed080063af43754 upstream. The MKS Instruments SCOM-0800 and SCOM-0801 cards (originally by Tenta Technologies) are 3U CompactPCI serial cards with 4 and 8 serial ports, respectively. The first 4 ports are implemented by an OX16PCI954 chip, and the second 4 ports are implemented by an OX16C954 chip on a local bus, bridged by the second PCI function of the OX16PCI954. The ports are jumper-selectable as RS-232 and RS-422/485, and the UARTs use a non-standard oscillator frequency of 20 MHz (base_baud = 1250000). Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index b98c1578f45a..4d09bd495a88 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -2688,6 +2688,8 @@ enum pci_board_num_t { pbn_b0_4_1152000_200, pbn_b0_8_1152000_200, + pbn_b0_4_1250000, + pbn_b0_2_1843200, pbn_b0_4_1843200, @@ -2919,6 +2921,13 @@ static struct pciserial_board pci_boards[] = { .uart_offset = 0x200, }, + [pbn_b0_4_1250000] = { + .flags = FL_BASE0, + .num_ports = 4, + .base_baud = 1250000, + .uart_offset = 8, + }, + [pbn_b0_2_1843200] = { .flags = FL_BASE0, .num_ports = 2, @@ -5549,6 +5558,10 @@ static struct pci_device_id serial_pci_tbl[] = { { PCI_DEVICE(0x1c29, 0x1108), .driver_data = pbn_fintek_8 }, { PCI_DEVICE(0x1c29, 0x1112), .driver_data = pbn_fintek_12 }, + /* MKS Tenta SCOM-080x serial cards */ + { PCI_DEVICE(0x1601, 0x0800), .driver_data = pbn_b0_4_1250000 }, + { PCI_DEVICE(0x1601, 0xa801), .driver_data = pbn_b0_4_1250000 }, + /* * These entries match devices with class COMMUNICATION_SERIAL, * COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL From 035dcc8e87f63b36f3456b39a352f0a1c785b93b Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Thu, 2 Feb 2017 16:39:31 +0100 Subject: [PATCH 157/357] KVM: s390: Disable dirty log retrieval for UCONTROL guests commit e1e8a9624f7ba8ead4f056ff558ed070e86fa747 upstream. User controlled KVM guests do not support the dirty log, as they have no single gmap that we can check for changes. As they have no single gmap, kvm->arch.gmap is NULL and all further referencing to it for dirty checking will result in a NULL dereference. Let's return -EINVAL if a caller tries to sync dirty logs for a UCONTROL guest. Fixes: 15f36eb ("KVM: s390: Add proper dirty bitmap support to S390 kvm.") Signed-off-by: Janosch Frank Reported-by: Martin Schwidefsky Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/kvm-s390.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 47a1de77b18d..5ba494ed18c1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -442,6 +442,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot; int is_dirty = 0; + if (kvm_is_ucontrol(kvm)) + return -EINVAL; + mutex_lock(&kvm->slots_lock); r = -EINVAL; From 7c3bab189c16a21742172b09979fd84c13698202 Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Tue, 21 Feb 2017 03:50:01 -0500 Subject: [PATCH 158/357] KVM: VMX: use correct vmcs_read/write for guest segment selector/base commit 96794e4ed4d758272c486e1529e431efb7045265 upstream. Guest segment selector is 16 bit field and guest segment base is natural width field. Fix two incorrect invocations accordingly. Without this patch, build fails when aggressive inlining is used with ICC. Signed-off-by: Chao Peng Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 64774f419c72..69b8f8a5ecb0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3693,7 +3693,7 @@ static void fix_rmode_seg(int seg, struct kvm_segment *save) } vmcs_write16(sf->selector, var.selector); - vmcs_write32(sf->base, var.base); + vmcs_writel(sf->base, var.base); vmcs_write32(sf->limit, var.limit); vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var)); } @@ -8202,7 +8202,7 @@ static void kvm_flush_pml_buffers(struct kvm *kvm) static void vmx_dump_sel(char *name, uint32_t sel) { pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n", - name, vmcs_read32(sel), + name, vmcs_read16(sel), vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR), vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR), vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR)); From 519b6cead21ee66ad7825750ce8aefaf64dd6792 Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Thu, 5 Jan 2017 13:19:53 +0300 Subject: [PATCH 159/357] Bluetooth: Add another AR3012 04ca:3018 device commit 441ad62d6c3f131f1dbd7dcdd9cbe3f74dbd8501 upstream. T: Bus=01 Lev=01 Prnt=01 Port=07 Cnt=04 Dev#= 5 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=04ca ProdID=3018 Rev=00.01 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb Signed-off-by: Dmitry Tunin Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index fadba88745dc..b793853ff05f 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -94,6 +94,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x04CA, 0x300f) }, { USB_DEVICE(0x04CA, 0x3010) }, { USB_DEVICE(0x04CA, 0x3014) }, + { USB_DEVICE(0x04CA, 0x3018) }, { USB_DEVICE(0x0930, 0x0219) }, { USB_DEVICE(0x0930, 0x021c) }, { USB_DEVICE(0x0930, 0x0220) }, @@ -162,6 +163,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3014), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x3018), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 2f633df9f4e6..dd220fad366c 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -209,6 +209,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3014), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x3018), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, From 5cec5e32ba5637076aa8f06d61f89cecd877b78e Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 21 Nov 2016 13:37:48 +0100 Subject: [PATCH 160/357] s390/qdio: clear DSCI prior to scanning multiple input queues commit 1e4a382fdc0ba8d1a85b758c0811de3a3631085e upstream. For devices with multiple input queues, tiqdio_call_inq_handlers() iterates over all input queues and clears the device's DSCI during each iteration. If the DSCI is re-armed during one of the later iterations, we therefore do not scan the previous queues again. The re-arming also raises a new adapter interrupt. But its handler does not trigger a rescan for the device, as the DSCI has already been erroneously cleared. This can result in queue stalls on devices with multiple input queues. Fix it by clearing the DSCI just once, prior to scanning the queues. As the code is moved in front of the loop, we also need to access the DSCI directly (ie irq->dsci) instead of going via each queue's parent pointer to the same irq. This is not a functional change, and a follow-up patch will clean up the other users. In practice, this bug only affects CQ-enabled HiperSockets devices, ie. devices with sysfs-attribute "hsuid" set. Setting a hsuid is needed for AF_IUCV socket applications that use HiperSockets communication. Fixes: 104ea556ee7f ("qdio: support asynchronous delivery of storage blocks") Reviewed-by: Ursula Braun Signed-off-by: Julian Wiedmann Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- drivers/s390/cio/qdio_thinint.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c index 5d06253c2a7a..30e9fbbff051 100644 --- a/drivers/s390/cio/qdio_thinint.c +++ b/drivers/s390/cio/qdio_thinint.c @@ -147,11 +147,11 @@ static inline void tiqdio_call_inq_handlers(struct qdio_irq *irq) struct qdio_q *q; int i; - for_each_input_queue(irq, q, i) { - if (!references_shared_dsci(irq) && - has_multiple_inq_on_dsci(irq)) - xchg(q->irq_ptr->dsci, 0); + if (!references_shared_dsci(irq) && + has_multiple_inq_on_dsci(irq)) + xchg(irq->dsci, 0); + for_each_input_queue(irq, q, i) { if (q->u.in.queue_start_poll) { /* skip if polling is enabled or already in work */ if (test_and_set_bit(QDIO_QUEUE_IRQS_DISABLED, From b848102542f5cab63606ad4a45da2e5edc14c571 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 30 Jan 2017 15:52:14 +0100 Subject: [PATCH 161/357] s390/dcssblk: fix device size calculation in dcssblk_direct_access() commit a63f53e34db8b49675448d03ae324f6c5bc04fe6 upstream. Since commit dd22f551 "block: Change direct_access calling convention", the device size calculation in dcssblk_direct_access() is off-by-one. This results in bdev_direct_access() always returning -ENXIO because the returned value is not page aligned. Fix this by adding 1 to the dev_sz calculation. Fixes: dd22f551 ("block: Change direct_access calling convention") Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- drivers/s390/block/dcssblk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 9d66b4fb174b..415d10a67b7a 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -892,7 +892,7 @@ dcssblk_direct_access (struct block_device *bdev, sector_t secnum, dev_info = bdev->bd_disk->private_data; if (!dev_info) return -ENODEV; - dev_sz = dev_info->end - dev_info->start; + dev_sz = dev_info->end - dev_info->start + 1; offset = secnum * 512; *kaddr = (void *) dev_info->start + offset; *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV); From 91cfcaa6ed46e41c8b44ee09754988b64e075ef2 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Tue, 7 Feb 2017 18:09:14 +0100 Subject: [PATCH 162/357] s390/kdump: Use "LINUX" ELF note name instead of "CORE" commit a4a81d8eebdc1d209d034f62a082a5131e4242b5 upstream. In binutils/libbfd (bfd/elf.c) it is enforced that all s390 specific ELF notes like e.g. NT_S390_PREFIX or NT_S390_CTRS have "LINUX" specified as note name. Otherwise the notes are ignored. For /proc/vmcore we currently use "CORE" for these notes. Up to now this has not been a real problem because the dump analysis tool "crash" does not check the note name. But it will break all programs that use libbfd for processing ELF notes. So fix this and use "LINUX" for all s390 specific notes to comply with libbfd. Reported-by: Philipp Rudo Reviewed-by: Philipp Rudo Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/crash_dump.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index f9293bfefb7f..408b4f4fda0f 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -329,7 +329,11 @@ static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len, static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len) { - return nt_init_name(buf, type, desc, d_len, KEXEC_CORE_NOTE_NAME); + const char *note_name = "LINUX"; + + if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG) + note_name = KEXEC_CORE_NOTE_NAME; + return nt_init_name(buf, type, desc, d_len, note_name); } /* From dc31841fcdce435ed88f87489f9ab09b48b43505 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Mon, 20 Feb 2017 14:52:58 +0100 Subject: [PATCH 163/357] s390/chsc: Add exception handler for CHSC instruction commit 77759137248f34864a8f7a58bbcebfcf1047504a upstream. Prevent kernel crashes due to unhandled exceptions raised by the CHSC instruction which may for example be triggered by invalid ioctl data. Fixes: 64150adf89df ("s390/cio: Introduce generic synchronous CHSC IOCTL") Signed-off-by: Peter Oberparleiter Reviewed-by: Sebastian Ott Reviewed-by: Cornelia Huck Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- drivers/s390/cio/ioasm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/s390/cio/ioasm.c b/drivers/s390/cio/ioasm.c index 8225da619014..4182f60124da 100644 --- a/drivers/s390/cio/ioasm.c +++ b/drivers/s390/cio/ioasm.c @@ -165,13 +165,15 @@ int tpi(struct tpi_info *addr) int chsc(void *chsc_area) { typedef struct { char _[4096]; } addr_type; - int cc; + int cc = -EIO; asm volatile( " .insn rre,0xb25f0000,%2,0\n" - " ipm %0\n" + "0: ipm %0\n" " srl %0,28\n" - : "=d" (cc), "=m" (*(addr_type *) chsc_area) + "1:\n" + EX_TABLE(0b, 1b) + : "+d" (cc), "=m" (*(addr_type *) chsc_area) : "d" (chsc_area), "m" (*(addr_type *) chsc_area) : "cc"); trace_s390_cio_chsc(chsc_area, cc); From 9d38fd6a4f6c097b32128995a260a55351dc7667 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 24 Feb 2017 07:43:51 +0100 Subject: [PATCH 164/357] s390: TASK_SIZE for kernel threads commit fb94a687d96c570d46332a4a890f1dcb7310e643 upstream. Return a sensible value if TASK_SIZE if called from a kernel thread. This gets us around an issue with copy_mount_options that does a magic size calculation "TASK_SIZE - (unsigned long)data" while in a kernel thread and data pointing to kernel space. Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/processor.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 602af692efdc..6bcbbece082b 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -89,7 +89,8 @@ extern void execve_tail(void); * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit. */ -#define TASK_SIZE_OF(tsk) ((tsk)->mm->context.asce_limit) +#define TASK_SIZE_OF(tsk) ((tsk)->mm ? \ + (tsk)->mm->context.asce_limit : TASK_MAX_SIZE) #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ (1UL << 30) : (1UL << 41)) #define TASK_SIZE TASK_SIZE_OF(current) From 321081d522d332b44fd506ea366e1c82cfd94d4e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 4 Feb 2017 11:40:36 +0100 Subject: [PATCH 165/357] s390: make setup_randomness work commit da8fd820f389a0e29080b14c61bf5cf1d8ef5ca1 upstream. Commit bcfcbb6bae64 ("s390: add system information as device randomness") intended to add some virtual machine specific information to the randomness pool. Unfortunately it uses the page allocator before it is ready to use. In result the page allocator always returns NULL and the setup_randomness function never adds anything to the randomness pool. To fix this use memblock_alloc and memblock_free instead. Fixes: bcfcbb6bae64 ("s390: add system information as device randomness") Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index d027f2eb3559..d2bc8fa8b4e6 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -819,10 +819,10 @@ static void __init setup_randomness(void) { struct sysinfo_3_2_2 *vmms; - vmms = (struct sysinfo_3_2_2 *) alloc_page(GFP_KERNEL); - if (vmms && stsi(vmms, 3, 2, 2) == 0 && vmms->count) + vmms = (struct sysinfo_3_2_2 *) memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) add_device_randomness(&vmms, vmms->count); - free_page((unsigned long) vmms); + memblock_free((unsigned long) vmms, PAGE_SIZE); } /* From e067f68db256c2dc04aae8acb42ebfc138d87a7e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 5 Feb 2017 23:03:18 +0100 Subject: [PATCH 166/357] s390: use correct input data address for setup_randomness commit 4920e3cf77347d7d7373552d4839e8d832321313 upstream. The current implementation of setup_randomness uses the stack address and therefore the pointer to the SYSIB 3.2.2 block as input data address. Furthermore the length of the input data is the number of virtual-machine description blocks which is typically one. This means that typically a single zero byte is fed to add_device_randomness. Fix both of these and use the address of the first virtual machine description block as input data address and also use the correct length. Fixes: bcfcbb6bae64 ("s390: add system information as device randomness") Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index d2bc8fa8b4e6..e974e53ab597 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -821,7 +821,7 @@ static void __init setup_randomness(void) vmms = (struct sysinfo_3_2_2 *) memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) - add_device_randomness(&vmms, vmms->count); + add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); memblock_free((unsigned long) vmms, PAGE_SIZE); } From 60037aa689bac37816dca4eab2e8319eaa785dc6 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 21 Feb 2017 11:28:01 +0100 Subject: [PATCH 167/357] net: mvpp2: fix DMA address calculation in mvpp2_txq_inc_put() commit 239a3b663647869330955ec59caac0100ef9b60a upstream. When TX descriptors are filled in, the buffer DMA address is split between the tx_desc->buf_phys_addr field (high-order bits) and tx_desc->packet_offset field (5 low-order bits). However, when we re-calculate the DMA address from the TX descriptor in mvpp2_txq_inc_put(), we do not take tx_desc->packet_offset into account. This means that when the DMA address is not aligned on a 32 bytes boundary, we end up calling dma_unmap_single() with a DMA address that was not the one returned by dma_map_single(). This inconsistency is detected by the kernel when DMA_API_DEBUG is enabled. We fix this problem by properly calculating the DMA address in mvpp2_txq_inc_put(). Signed-off-by: Thomas Petazzoni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvpp2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 930c8165f2a8..0a4e81a253fb 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -991,7 +991,7 @@ static void mvpp2_txq_inc_put(struct mvpp2_txq_pcpu *txq_pcpu, txq_pcpu->buffs + txq_pcpu->txq_put_index; tx_buf->skb = skb; tx_buf->size = tx_desc->data_size; - tx_buf->phys = tx_desc->buf_phys_addr; + tx_buf->phys = tx_desc->buf_phys_addr + tx_desc->packet_offset; txq_pcpu->txq_put_index++; if (txq_pcpu->txq_put_index == txq_pcpu->size) txq_pcpu->txq_put_index = 0; From 411d0b0ced692dd2c0d7c10514ca8b923d8fa0f8 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 9 Dec 2016 17:18:50 +1100 Subject: [PATCH 168/357] cxl: Prevent read/write to AFU config space while AFU not configured commit 14a3ae34bfd0bcb1cc12d55b06a8584c11fac6fc upstream. During EEH recovery, we deconfigure all AFUs whilst leaving the corresponding vPHB and virtual PCI device in place. If something attempts to interact with the AFU's PCI config space (e.g. running lspci) after the AFU has been deconfigured and before it's reconfigured, cxl_pcie_{read,write}_config() will read invalid values from the deconfigured struct cxl_afu and proceed to Oops when they try to dereference pointers that have been set to NULL during deconfiguration. Add a rwsem to struct cxl_afu so we can prevent interaction with config space while the AFU is deconfigured. Reported-by: Pradipta Ghosh Suggested-by: Frederic Barrat Signed-off-by: Andrew Donnellan Signed-off-by: Vaibhav Jain Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/cxl.h | 2 ++ drivers/misc/cxl/main.c | 3 ++- drivers/misc/cxl/pci.c | 2 ++ drivers/misc/cxl/vphb.c | 51 +++++++++++++++++++++++------------------ 4 files changed, 35 insertions(+), 23 deletions(-) diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index a144073593fa..379c463e0c4f 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -418,6 +418,8 @@ struct cxl_afu { struct dentry *debugfs; struct mutex contexts_lock; spinlock_t afu_cntl_lock; + /* Used to block access to AFU config space while deconfigured */ + struct rw_semaphore configured_rwsem; /* AFU error buffer fields and bin attribute for sysfs */ u64 eb_len, eb_offset; diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 62e0dfb5f15b..2a6bf1d0a3a4 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -268,7 +268,8 @@ struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice) idr_init(&afu->contexts_idr); mutex_init(&afu->contexts_lock); spin_lock_init(&afu->afu_cntl_lock); - + init_rwsem(&afu->configured_rwsem); + down_write(&afu->configured_rwsem); afu->prefault_mode = CXL_PREFAULT_NONE; afu->irqs_max = afu->adapter->user_irqs; diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index e96be9ca4e60..b2ff10891775 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1129,6 +1129,7 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc if ((rc = cxl_native_register_psl_irq(afu))) goto err2; + up_write(&afu->configured_rwsem); return 0; err2: @@ -1141,6 +1142,7 @@ err1: static void pci_deconfigure_afu(struct cxl_afu *afu) { + down_write(&afu->configured_rwsem); cxl_native_release_psl_irq(afu); if (afu->adapter->native->sl_ops->release_serr_irq) afu->adapter->native->sl_ops->release_serr_irq(afu); diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 3519acebfdab..639a343b7836 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -76,23 +76,22 @@ static int cxl_pcie_cfg_record(u8 bus, u8 devfn) return (bus << 8) + devfn; } -static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, - struct cxl_afu **_afu, int *_record) +static inline struct cxl_afu *pci_bus_to_afu(struct pci_bus *bus) +{ + struct pci_controller *phb = bus ? pci_bus_to_host(bus) : NULL; + + return phb ? phb->private_data : NULL; +} + +static inline int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, + struct cxl_afu *afu, int *_record) { - struct pci_controller *phb; - struct cxl_afu *afu; int record; - phb = pci_bus_to_host(bus); - if (phb == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - - afu = (struct cxl_afu *)phb->private_data; record = cxl_pcie_cfg_record(bus->number, devfn); if (record > afu->crs_num) return PCIBIOS_DEVICE_NOT_FOUND; - *_afu = afu; *_record = record; return 0; } @@ -106,9 +105,14 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn, u16 val16; u32 val32; - rc = cxl_pcie_config_info(bus, devfn, &afu, &record); + afu = pci_bus_to_afu(bus); + /* Grab a reader lock on afu. */ + if (afu == NULL || !down_read_trylock(&afu->configured_rwsem)) + return PCIBIOS_DEVICE_NOT_FOUND; + + rc = cxl_pcie_config_info(bus, devfn, afu, &record); if (rc) - return rc; + goto out; switch (len) { case 1: @@ -127,10 +131,9 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn, WARN_ON(1); } - if (rc) - return PCIBIOS_DEVICE_NOT_FOUND; - - return PCIBIOS_SUCCESSFUL; +out: + up_read(&afu->configured_rwsem); + return rc ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL; } static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn, @@ -139,9 +142,14 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn, int rc, record; struct cxl_afu *afu; - rc = cxl_pcie_config_info(bus, devfn, &afu, &record); + afu = pci_bus_to_afu(bus); + /* Grab a reader lock on afu. */ + if (afu == NULL || !down_read_trylock(&afu->configured_rwsem)) + return PCIBIOS_DEVICE_NOT_FOUND; + + rc = cxl_pcie_config_info(bus, devfn, afu, &record); if (rc) - return rc; + goto out; switch (len) { case 1: @@ -157,10 +165,9 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn, WARN_ON(1); } - if (rc) - return PCIBIOS_SET_FAILED; - - return PCIBIOS_SUCCESSFUL; +out: + up_read(&afu->configured_rwsem); + return rc ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL; } static struct pci_ops cxl_pcie_pci_ops = From 53d43706f2ba5cd805093d21d69fd700584a71ab Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Mon, 6 Feb 2017 12:07:17 +1100 Subject: [PATCH 169/357] cxl: fix nested locking hang during EEH hotplug commit 171ed0fcd8966d82c45376f1434678e7b9d4d9b1 upstream. Commit 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU not configured") introduced a rwsem to fix an invalid memory access that occurred when someone attempts to access the config space of an AFU on a vPHB whilst the AFU is deconfigured, such as during EEH recovery. It turns out that it's possible to run into a nested locking issue when EEH recovery fails and a full device hotplug is required. cxl_pci_error_detected() deconfigures the AFU, taking a writer lock on configured_rwsem. When EEH recovery fails, the EEH code calls pci_hp_remove_devices() to remove the device, which in turn calls cxl_remove() -> cxl_pci_remove_afu() -> pci_deconfigure_afu(), which tries to grab the writer lock that's already held. Standard rwsem semantics don't express what we really want to do here and don't allow for nested locking. Fix this by replacing the rwsem with an atomic_t which we can control more finely. Allow the AFU to be locked multiple times so long as there are no readers. Fixes: 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU not configured") Signed-off-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/cxl.h | 5 +++-- drivers/misc/cxl/main.c | 3 +-- drivers/misc/cxl/pci.c | 11 +++++++++-- drivers/misc/cxl/vphb.c | 18 ++++++++++++++---- 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 379c463e0c4f..52ee3da85366 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -418,8 +418,9 @@ struct cxl_afu { struct dentry *debugfs; struct mutex contexts_lock; spinlock_t afu_cntl_lock; - /* Used to block access to AFU config space while deconfigured */ - struct rw_semaphore configured_rwsem; + + /* -1: AFU deconfigured/locked, >= 0: number of readers */ + atomic_t configured_state; /* AFU error buffer fields and bin attribute for sysfs */ u64 eb_len, eb_offset; diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 2a6bf1d0a3a4..cc1706a92ace 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -268,8 +268,7 @@ struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice) idr_init(&afu->contexts_idr); mutex_init(&afu->contexts_lock); spin_lock_init(&afu->afu_cntl_lock); - init_rwsem(&afu->configured_rwsem); - down_write(&afu->configured_rwsem); + atomic_set(&afu->configured_state, -1); afu->prefault_mode = CXL_PREFAULT_NONE; afu->irqs_max = afu->adapter->user_irqs; diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index b2ff10891775..dd99b06e121a 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1129,7 +1129,7 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc if ((rc = cxl_native_register_psl_irq(afu))) goto err2; - up_write(&afu->configured_rwsem); + atomic_set(&afu->configured_state, 0); return 0; err2: @@ -1142,7 +1142,14 @@ err1: static void pci_deconfigure_afu(struct cxl_afu *afu) { - down_write(&afu->configured_rwsem); + /* + * It's okay to deconfigure when AFU is already locked, otherwise wait + * until there are no readers + */ + if (atomic_read(&afu->configured_state) != -1) { + while (atomic_cmpxchg(&afu->configured_state, 0, -1) != -1) + schedule(); + } cxl_native_release_psl_irq(afu); if (afu->adapter->native->sl_ops->release_serr_irq) afu->adapter->native->sl_ops->release_serr_irq(afu); diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 639a343b7836..512a4897dbf6 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -83,6 +83,16 @@ static inline struct cxl_afu *pci_bus_to_afu(struct pci_bus *bus) return phb ? phb->private_data : NULL; } +static void cxl_afu_configured_put(struct cxl_afu *afu) +{ + atomic_dec_if_positive(&afu->configured_state); +} + +static bool cxl_afu_configured_get(struct cxl_afu *afu) +{ + return atomic_inc_unless_negative(&afu->configured_state); +} + static inline int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, struct cxl_afu *afu, int *_record) { @@ -107,7 +117,7 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn, afu = pci_bus_to_afu(bus); /* Grab a reader lock on afu. */ - if (afu == NULL || !down_read_trylock(&afu->configured_rwsem)) + if (afu == NULL || !cxl_afu_configured_get(afu)) return PCIBIOS_DEVICE_NOT_FOUND; rc = cxl_pcie_config_info(bus, devfn, afu, &record); @@ -132,7 +142,7 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn, } out: - up_read(&afu->configured_rwsem); + cxl_afu_configured_put(afu); return rc ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL; } @@ -144,7 +154,7 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn, afu = pci_bus_to_afu(bus); /* Grab a reader lock on afu. */ - if (afu == NULL || !down_read_trylock(&afu->configured_rwsem)) + if (afu == NULL || !cxl_afu_configured_get(afu)) return PCIBIOS_DEVICE_NOT_FOUND; rc = cxl_pcie_config_info(bus, devfn, afu, &record); @@ -166,7 +176,7 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn, } out: - up_read(&afu->configured_rwsem); + cxl_afu_configured_put(afu); return rc ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL; } From c9b3f3173fa5178df2c8fe2279442f48faaf7a98 Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Tue, 17 Jan 2017 15:24:05 -0800 Subject: [PATCH 170/357] brcmfmac: fix incorrect event channel deduction commit 8e290cecdd0178f3d4cf7d463c51dc7e462843b4 upstream. brcmf_sdio_fromevntchan() was being called on the the data frame rather than the software header, causing some frames to be mischaracterized as on the event channel rather than the data channel. This fixes a major performance regression (due to dropped packets). With this patch the download speed jumped from 1Mbit/s back up to 40MBit/s due to the sheer amount of packets being incorrectly processed. Fixes: c56caa9db8ab ("brcmfmac: screening firmware event packet") Signed-off-by: Gavin Li Acked-by: Arend van Spriel [kvalo@codeaurora.org: improve commit logs based on email discussion] Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index b892dac70f4b..2458e6e05276 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -1660,7 +1660,7 @@ static u8 brcmf_sdio_rxglom(struct brcmf_sdio *bus, u8 rxseq) pfirst->len, pfirst->next, pfirst->prev); skb_unlink(pfirst, &bus->glom); - if (brcmf_sdio_fromevntchan(pfirst->data)) + if (brcmf_sdio_fromevntchan(&dptr[SDPCM_HWHDR_LEN])) brcmf_rx_event(bus->sdiodev->dev, pfirst); else brcmf_rx_frame(bus->sdiodev->dev, pfirst, From 808e83e5add13152bd4a88346487de68395c136b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Jan 2017 18:28:35 +1300 Subject: [PATCH 171/357] mnt: Tuck mounts under others instead of creating shadow/side mounts. commit 1064f874abc0d05eeed8993815f584d847b72486 upstream. Ever since mount propagation was introduced in cases where a mount in propagated to parent mount mountpoint pair that is already in use the code has placed the new mount behind the old mount in the mount hash table. This implementation detail is problematic as it allows creating arbitrary length mount hash chains. Furthermore it invalidates the constraint maintained elsewhere in the mount code that a parent mount and a mountpoint pair will have exactly one mount upon them. Making it hard to deal with and to talk about this special case in the mount code. Modify mount propagation to notice when there is already a mount at the parent mount and mountpoint where a new mount is propagating to and place that preexisting mount on top of the new mount. Modify unmount propagation to notice when a mount that is being unmounted has another mount on top of it (and no other children), and to replace the unmounted mount with the mount on top of it. Move the MNT_UMUONT test from __lookup_mnt_last into __propagate_umount as that is the only call of __lookup_mnt_last where MNT_UMOUNT may be set on any mount visible in the mount hash table. These modifications allow: - __lookup_mnt_last to be removed. - attach_shadows to be renamed __attach_mnt and its shadow handling to be removed. - commit_tree to be simplified - copy_tree to be simplified The result is an easier to understand tree of mounts that does not allow creation of arbitrary length hash chains in the mount hash table. The result is also a very slight userspace visible difference in semantics. The following two cases now behave identically, where before order mattered: case 1: (explicit user action) B is a slave of A mount something on A/a , it will propagate to B/a and than mount something on B/a case 2: (tucked mount) B is a slave of A mount something on B/a and than mount something on A/a Histroically umount A/a would fail in case 1 and succeed in case 2. Now umount A/a succeeds in both configurations. This very small change in semantics appears if anything to be a bug fix to me and my survey of userspace leads me to believe that no programs will notice or care of this subtle semantic change. v2: Updated to mnt_change_mountpoint to not call dput or mntput and instead to decrement the counts directly. It is guaranteed that there will be other references when mnt_change_mountpoint is called so this is safe. v3: Moved put_mountpoint under mount_lock in attach_recursive_mnt As the locking in fs/namespace.c changed between v2 and v3. v4: Reworked the logic in propagate_mount_busy and __propagate_umount that detects when a mount completely covers another mount. v5: Removed unnecessary tests whose result is alwasy true in find_topper and attach_recursive_mnt. v6: Document the user space visible semantic difference. Fixes: b90fa9ae8f51 ("[PATCH] shared mount handling: bind and rbind") Tested-by: Andrei Vagin Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/mount.h | 1 - fs/namespace.c | 110 +++++++++++++++++++++++++++---------------------- fs/pnode.c | 61 +++++++++++++++++++++------ fs/pnode.h | 2 + 4 files changed, 111 insertions(+), 63 deletions(-) diff --git a/fs/mount.h b/fs/mount.h index d2e25d7b64b3..d8295f273a2f 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -89,7 +89,6 @@ static inline int is_mounted(struct vfsmount *mnt) } extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *); -extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *); extern int __legitimize_mnt(struct vfsmount *, unsigned); extern bool legitimize_mnt(struct vfsmount *, unsigned); diff --git a/fs/namespace.c b/fs/namespace.c index 7cea503ae06d..ea751263fefa 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -640,28 +640,6 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) return NULL; } -/* - * find the last mount at @dentry on vfsmount @mnt. - * mount_lock must be held. - */ -struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) -{ - struct mount *p, *res = NULL; - p = __lookup_mnt(mnt, dentry); - if (!p) - goto out; - if (!(p->mnt.mnt_flags & MNT_UMOUNT)) - res = p; - hlist_for_each_entry_continue(p, mnt_hash) { - if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry) - break; - if (!(p->mnt.mnt_flags & MNT_UMOUNT)) - res = p; - } -out: - return res; -} - /* * lookup_mnt - Return the first child mount mounted at path * @@ -882,6 +860,13 @@ void mnt_set_mountpoint(struct mount *mnt, hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list); } +static void __attach_mnt(struct mount *mnt, struct mount *parent) +{ + hlist_add_head_rcu(&mnt->mnt_hash, + m_hash(&parent->mnt, mnt->mnt_mountpoint)); + list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); +} + /* * vfsmount lock must be held for write */ @@ -890,28 +875,45 @@ static void attach_mnt(struct mount *mnt, struct mountpoint *mp) { mnt_set_mountpoint(parent, mp, mnt); - hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); - list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); + __attach_mnt(mnt, parent); } -static void attach_shadowed(struct mount *mnt, - struct mount *parent, - struct mount *shadows) +void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt) { - if (shadows) { - hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash); - list_add(&mnt->mnt_child, &shadows->mnt_child); - } else { - hlist_add_head_rcu(&mnt->mnt_hash, - m_hash(&parent->mnt, mnt->mnt_mountpoint)); - list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); - } + struct mountpoint *old_mp = mnt->mnt_mp; + struct dentry *old_mountpoint = mnt->mnt_mountpoint; + struct mount *old_parent = mnt->mnt_parent; + + list_del_init(&mnt->mnt_child); + hlist_del_init(&mnt->mnt_mp_list); + hlist_del_init_rcu(&mnt->mnt_hash); + + attach_mnt(mnt, parent, mp); + + put_mountpoint(old_mp); + + /* + * Safely avoid even the suggestion this code might sleep or + * lock the mount hash by taking advantage of the knowledge that + * mnt_change_mountpoint will not release the final reference + * to a mountpoint. + * + * During mounting, the mount passed in as the parent mount will + * continue to use the old mountpoint and during unmounting, the + * old mountpoint will continue to exist until namespace_unlock, + * which happens well after mnt_change_mountpoint. + */ + spin_lock(&old_mountpoint->d_lock); + old_mountpoint->d_lockref.count--; + spin_unlock(&old_mountpoint->d_lock); + + mnt_add_count(old_parent, -1); } /* * vfsmount lock must be held for write */ -static void commit_tree(struct mount *mnt, struct mount *shadows) +static void commit_tree(struct mount *mnt) { struct mount *parent = mnt->mnt_parent; struct mount *m; @@ -929,7 +931,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows) n->mounts += n->pending_mounts; n->pending_mounts = 0; - attach_shadowed(mnt, parent, shadows); + __attach_mnt(mnt, parent); touch_mnt_namespace(n); } @@ -1737,7 +1739,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, continue; for (s = r; s; s = next_mnt(s, r)) { - struct mount *t = NULL; if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(s)) { s = skip_mnt_tree(s); @@ -1759,14 +1760,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, goto out; lock_mount_hash(); list_add_tail(&q->mnt_list, &res->mnt_list); - mnt_set_mountpoint(parent, p->mnt_mp, q); - if (!list_empty(&parent->mnt_mounts)) { - t = list_last_entry(&parent->mnt_mounts, - struct mount, mnt_child); - if (t->mnt_mp != p->mnt_mp) - t = NULL; - } - attach_shadowed(q, parent, t); + attach_mnt(q, parent, p->mnt_mp); unlock_mount_hash(); } } @@ -1967,10 +1961,18 @@ static int attach_recursive_mnt(struct mount *source_mnt, { HLIST_HEAD(tree_list); struct mnt_namespace *ns = dest_mnt->mnt_ns; + struct mountpoint *smp; struct mount *child, *p; struct hlist_node *n; int err; + /* Preallocate a mountpoint in case the new mounts need + * to be tucked under other mounts. + */ + smp = get_mountpoint(source_mnt->mnt.mnt_root); + if (IS_ERR(smp)) + return PTR_ERR(smp); + /* Is there space to add these mounts to the mount namespace? */ if (!parent_path) { err = count_mounts(ns, source_mnt); @@ -1997,16 +1999,19 @@ static int attach_recursive_mnt(struct mount *source_mnt, touch_mnt_namespace(source_mnt->mnt_ns); } else { mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); - commit_tree(source_mnt, NULL); + commit_tree(source_mnt); } hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) { struct mount *q; hlist_del_init(&child->mnt_hash); - q = __lookup_mnt_last(&child->mnt_parent->mnt, - child->mnt_mountpoint); - commit_tree(child, q); + q = __lookup_mnt(&child->mnt_parent->mnt, + child->mnt_mountpoint); + if (q) + mnt_change_mountpoint(child, smp, q); + commit_tree(child); } + put_mountpoint(smp); unlock_mount_hash(); return 0; @@ -2021,6 +2026,11 @@ static int attach_recursive_mnt(struct mount *source_mnt, cleanup_group_ids(source_mnt, NULL); out: ns->pending_mounts = 0; + + read_seqlock_excl(&mount_lock); + put_mountpoint(smp); + read_sequnlock_excl(&mount_lock); + return err; } diff --git a/fs/pnode.c b/fs/pnode.c index 234a9ac49958..b394ca5307ec 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -324,6 +324,21 @@ out: return ret; } +static struct mount *find_topper(struct mount *mnt) +{ + /* If there is exactly one mount covering mnt completely return it. */ + struct mount *child; + + if (!list_is_singular(&mnt->mnt_mounts)) + return NULL; + + child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child); + if (child->mnt_mountpoint != mnt->mnt.mnt_root) + return NULL; + + return child; +} + /* * return true if the refcount is greater than count */ @@ -344,9 +359,8 @@ static inline int do_refcount_check(struct mount *mnt, int count) */ int propagate_mount_busy(struct mount *mnt, int refcnt) { - struct mount *m, *child; + struct mount *m, *child, *topper; struct mount *parent = mnt->mnt_parent; - int ret = 0; if (mnt == parent) return do_refcount_check(mnt, refcnt); @@ -361,12 +375,24 @@ int propagate_mount_busy(struct mount *mnt, int refcnt) for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { - child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint); - if (child && list_empty(&child->mnt_mounts) && - (ret = do_refcount_check(child, 1))) - break; + int count = 1; + child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint); + if (!child) + continue; + + /* Is there exactly one mount on the child that covers + * it completely whose reference should be ignored? + */ + topper = find_topper(child); + if (topper) + count += 1; + else if (!list_empty(&child->mnt_mounts)) + continue; + + if (do_refcount_check(child, count)) + return 1; } - return ret; + return 0; } /* @@ -383,7 +409,7 @@ void propagate_mount_unlock(struct mount *mnt) for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { - child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint); + child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint); if (child) child->mnt.mnt_flags &= ~MNT_LOCKED; } @@ -401,9 +427,11 @@ static void mark_umount_candidates(struct mount *mnt) for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { - struct mount *child = __lookup_mnt_last(&m->mnt, + struct mount *child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint); - if (child && (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m))) { + if (!child || (child->mnt.mnt_flags & MNT_UMOUNT)) + continue; + if (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m)) { SET_MNT_MARK(child); } } @@ -422,8 +450,8 @@ static void __propagate_umount(struct mount *mnt) for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { - - struct mount *child = __lookup_mnt_last(&m->mnt, + struct mount *topper; + struct mount *child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint); /* * umount the child only if the child has no children @@ -432,6 +460,15 @@ static void __propagate_umount(struct mount *mnt) if (!child || !IS_MNT_MARKED(child)) continue; CLEAR_MNT_MARK(child); + + /* If there is exactly one mount covering all of child + * replace child with that mount. + */ + topper = find_topper(child); + if (topper) + mnt_change_mountpoint(child->mnt_parent, child->mnt_mp, + topper); + if (list_empty(&child->mnt_mounts)) { list_del_init(&child->mnt_child); child->mnt.mnt_flags |= MNT_UMOUNT; diff --git a/fs/pnode.h b/fs/pnode.h index 550f5a8b4fcf..dc87e65becd2 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -49,6 +49,8 @@ int get_dominating_id(struct mount *mnt, const struct path *root); unsigned int mnt_get_count(struct mount *mnt); void mnt_set_mountpoint(struct mount *, struct mountpoint *, struct mount *); +void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, + struct mount *mnt); struct mount *copy_tree(struct mount *, struct dentry *, int); bool is_path_reachable(struct mount *, struct dentry *, const struct path *root); From 1626076b8e1cde49becc0e68d2779174e6a6f599 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Wed, 28 Dec 2016 14:47:23 +0200 Subject: [PATCH 172/357] IB/ipoib: Fix deadlock between rmmod and set_mode commit 0a0007f28304cb9fc87809c86abb80ec71317f20 upstream. When calling set_mode from sys/fs, the call flow locks the sys/fs lock first and then tries to lock rtnl_lock (when calling ipoib_set_mod). On the other hand, the rmmod call flow takes the rtnl_lock first (when calling unregister_netdev) and then tries to take the sys/fs lock. Deadlock a->b, b->a. The problem starts when ipoib_set_mod frees it's rtnl_lck and tries to get it after that. set_mod: [] ? check_preempt_curr+0x6d/0x90 [] __mutex_lock_slowpath+0x13e/0x180 [] ? __rtnl_unlock+0x15/0x20 [] mutex_lock+0x2b/0x50 [] rtnl_lock+0x15/0x20 [] ipoib_set_mode+0x97/0x160 [ib_ipoib] [] set_mode+0x3b/0x80 [ib_ipoib] [] dev_attr_store+0x20/0x30 [] sysfs_write_file+0xe5/0x170 [] vfs_write+0xb8/0x1a0 [] sys_write+0x51/0x90 [] system_call_fastpath+0x16/0x1b rmmod: [] ? put_dec+0x10c/0x110 [] ? number+0x2ee/0x320 [] schedule_timeout+0x215/0x2e0 [] ? vsnprintf+0x484/0x5f0 [] ? string+0x40/0x100 [] wait_for_common+0x123/0x180 [] ? default_wake_function+0x0/0x20 [] ? ifind_fast+0x5e/0xb0 [] wait_for_completion+0x1d/0x20 [] sysfs_addrm_finish+0x228/0x270 [] sysfs_remove_dir+0xa3/0xf0 [] kobject_del+0x16/0x40 [] device_del+0x184/0x1e0 [] netdev_unregister_kobject+0xab/0xc0 [] rollback_registered+0xae/0x130 [] unregister_netdevice+0x22/0x70 [] unregister_netdev+0x1e/0x30 [] ipoib_remove_one+0xe0/0x120 [ib_ipoib] [] ib_unregister_device+0x4f/0x100 [ib_core] [] mlx4_ib_remove+0x41/0x180 [mlx4_ib] [] mlx4_remove_device+0x71/0x90 [mlx4_core] Fixes: 862096a8bbf8 ("IB/ipoib: Add more rtnl_link_ops callbacks") Cc: Or Gerlitz Signed-off-by: Feras Daoud Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 12 +++++++----- drivers/infiniband/ulp/ipoib/ipoib_main.c | 6 ++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 81a8080c18b3..0616a65f0d78 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1511,12 +1511,14 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, ret = ipoib_set_mode(dev, buf); - rtnl_unlock(); + /* The assumption is that the function ipoib_set_mode returned + * with the rtnl held by it, if not the value -EBUSY returned, + * then no need to rtnl_unlock + */ + if (ret != -EBUSY) + rtnl_unlock(); - if (!ret) - return count; - - return ret; + return (!ret || ret == -EBUSY) ? count : ret; } static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index b58d9dca5c93..a5d9678f3b33 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -468,8 +468,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM; ipoib_flush_paths(dev); - rtnl_lock(); - return 0; + return (!rtnl_trylock()) ? -EBUSY : 0; } if (!strcmp(buf, "datagram\n")) { @@ -478,8 +477,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); rtnl_unlock(); ipoib_flush_paths(dev); - rtnl_lock(); - return 0; + return (!rtnl_trylock()) ? -EBUSY : 0; } return -EINVAL; From 2e539fa49efda450229e3a13db5202b4d9ae2997 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Wed, 1 Feb 2017 19:10:05 +0200 Subject: [PATCH 173/357] IB/IPoIB: Add destination address when re-queue packet commit 2b0841766a898aba84630fb723989a77a9d3b4e6 upstream. When sending packet to destination that was not resolved yet via path query, the driver keeps the skb and tries to re-send it again when the path is resolved. But when re-sending via dev_queue_xmit the kernel doesn't call to dev_hard_header, so IPoIB needs to keep 20 bytes in the skb and to put the destination address inside them. In that way the dev_start_xmit will have the correct destination, and the driver won't take the destination from the skb->data, while nothing exists there, which causes to packet be be dropped. The test flow is: 1. Run the SM on remote node, 2. Restart the driver. 4. Ping some destination, 3. Observe that first ICMP request will be dropped. Fixes: fc791b633515 ("IB/ipoib: move back IB LL address into the hard header") Signed-off-by: Erez Shitrit Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Tested-by: Yuval Shaia Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 30 +++++++++++++---------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a5d9678f3b33..3ef7b8f049c4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -701,6 +701,14 @@ int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv) return ret; } +static void push_pseudo_header(struct sk_buff *skb, const char *daddr) +{ + struct ipoib_pseudo_header *phdr; + + phdr = (struct ipoib_pseudo_header *)skb_push(skb, sizeof(*phdr)); + memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN); +} + void ipoib_flush_paths(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -925,8 +933,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, } if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { - /* put pseudoheader back on for next time */ - skb_push(skb, IPOIB_PSEUDO_LEN); + push_pseudo_header(skb, neigh->daddr); __skb_queue_tail(&neigh->queue, skb); } else { ipoib_warn(priv, "queue length limit %d. Packet drop.\n", @@ -944,10 +951,12 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, if (!path->query && path_rec_start(dev, path)) goto err_path; - if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) + if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { + push_pseudo_header(skb, neigh->daddr); __skb_queue_tail(&neigh->queue, skb); - else + } else { goto err_drop; + } } spin_unlock_irqrestore(&priv->lock, flags); @@ -983,8 +992,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, } if (path) { if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { - /* put pseudoheader back on for next time */ - skb_push(skb, IPOIB_PSEUDO_LEN); + push_pseudo_header(skb, phdr->hwaddr); __skb_queue_tail(&path->queue, skb); } else { ++dev->stats.tx_dropped; @@ -1016,8 +1024,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, return; } else if ((path->query || !path_rec_start(dev, path)) && skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { - /* put pseudoheader back on for next time */ - skb_push(skb, IPOIB_PSEUDO_LEN); + push_pseudo_header(skb, phdr->hwaddr); __skb_queue_tail(&path->queue, skb); } else { ++dev->stats.tx_dropped; @@ -1098,8 +1105,7 @@ send_using_neigh: } if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { - /* put pseudoheader back on for next time */ - skb_push(skb, sizeof(*phdr)); + push_pseudo_header(skb, phdr->hwaddr); spin_lock_irqsave(&priv->lock, flags); __skb_queue_tail(&neigh->queue, skb); spin_unlock_irqrestore(&priv->lock, flags); @@ -1131,7 +1137,6 @@ static int ipoib_hard_header(struct sk_buff *skb, unsigned short type, const void *daddr, const void *saddr, unsigned len) { - struct ipoib_pseudo_header *phdr; struct ipoib_header *header; header = (struct ipoib_header *) skb_push(skb, sizeof *header); @@ -1144,8 +1149,7 @@ static int ipoib_hard_header(struct sk_buff *skb, * destination address into skb hard header so we can figure out where * to send the packet later. */ - phdr = (struct ipoib_pseudo_header *) skb_push(skb, sizeof(*phdr)); - memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN); + push_pseudo_header(skb, daddr); return IPOIB_HARD_LEN; } From 04f16db056d035785f217f1b958fe49ca3cd9be5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 18 Jan 2017 14:10:30 +0200 Subject: [PATCH 174/357] IB/mlx5: Fix out-of-bound access commit 0fd27a88c2e4f548937fd7d93fc6e65c4ad7c278 upstream. When we initialize buffer to create SRQ in kernel, the number of pages was less than actually used in following mlx5_fill_page_array(). Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Leon Romanovsky Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/srq.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 729b0696626e..d61fd2c727c0 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -165,8 +165,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, int err; int i; struct mlx5_wqe_srq_next_seg *next; - int page_shift; - int npages; err = mlx5_db_alloc(dev->mdev, &srq->db); if (err) { @@ -179,7 +177,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, err = -ENOMEM; goto err_db; } - page_shift = srq->buf.page_shift; srq->head = 0; srq->tail = srq->msrq.max - 1; @@ -191,10 +188,8 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, cpu_to_be16((i + 1) & (srq->msrq.max - 1)); } - npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT)); - mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n", - buf_size, page_shift, srq->buf.npages, npages); - in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages); + mlx5_ib_dbg(dev, "srq->buf.page_shift = %d\n", srq->buf.page_shift); + in->pas = mlx5_vzalloc(sizeof(*in->pas) * srq->buf.npages); if (!in->pas) { err = -ENOMEM; goto err_buf; @@ -210,7 +205,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, } srq->wq_sig = !!srq_signature; - in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && in->type == IB_SRQT_XRC) in->user_index = MLX5_IB_DEFAULT_UIDX; From 516a12ab11bdd66104b1775a097a9a28c311cecb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 14 Feb 2017 10:56:29 -0800 Subject: [PATCH 175/357] IB/SRP: Avoid using IB_MR_TYPE_SG_GAPS commit d6c58dc40fec35ff6cdb350b53bce0fcf9143709 upstream. Tests have shown that the following error message is reported when using SG-GAPS registration with an mlx5 adapter: scsi host1: ib_srp: failed RECV status WR flushed (5) for CQE ffff880bd4270eb0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 0f007806 2500002a ad9fafd1 scsi host1: ib_srp: reconnect succeeded mlx5_0:dump_cqe:262:(pid 7369): dump error cqe 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 0f007806 25000032 00105dd0 scsi host1: ib_srp: failed FAST REG status memory management operation error (6) for CQE ffff880b92860138 Hence avoid using SG-GAPS memory registrations. Additionally, always configure the blk_queue_virt_boundary() to avoid to trigger a mapping failure when using adapters that support SG-GAPS (e.g. mlx5). Fixes: commit ad8e66b4a801 ("IB/srp: fix mr allocation when the device supports sg gaps") Fixes: commit 509c5f33f4f6 ("IB/srp: Prevent mapping failures") Reported-by: Laurence Oberman Signed-off-by: Bart Van Assche Cc: Israel Rukshin Cc: Max Gurtovoy Cc: Leon Romanovsky Cc: Mark Bloch Cc: Yuval Shaia Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index e7dcf14a76e2..97247582eaa6 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -366,7 +366,6 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, struct srp_fr_desc *d; struct ib_mr *mr; int i, ret = -EINVAL; - enum ib_mr_type mr_type; if (pool_size <= 0) goto err; @@ -380,13 +379,9 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, spin_lock_init(&pool->lock); INIT_LIST_HEAD(&pool->free_list); - if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) - mr_type = IB_MR_TYPE_SG_GAPS; - else - mr_type = IB_MR_TYPE_MEM_REG; - for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { - mr = ib_alloc_mr(pd, mr_type, max_page_list_len); + mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, + max_page_list_len); if (IS_ERR(mr)) { ret = PTR_ERR(mr); goto destroy_pool; @@ -2652,9 +2647,8 @@ static int srp_slave_alloc(struct scsi_device *sdev) struct Scsi_Host *shost = sdev->host; struct srp_target_port *target = host_to_target(shost); struct srp_device *srp_dev = target->srp_host->srp_dev; - struct ib_device *ibdev = srp_dev->dev; - if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) + if (true) blk_queue_virt_boundary(sdev->request_queue, ~srp_dev->mr_page_mask); From d5d1d2cc4be76abdabb58e1bdad84fc06090042c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 14 Feb 2017 10:56:30 -0800 Subject: [PATCH 176/357] IB/srp: Avoid that duplicate responses trigger a kernel bug commit 6cb72bc1b40bb2c1750ee7a5ebade93bed49a5fb upstream. After srp_process_rsp() returns there is a short time during which the scsi_host_find_tag() call will return a pointer to the SCSI command that is being completed. If during that time a duplicate response is received, avoid that the following call stack appears: BUG: unable to handle kernel NULL pointer dereference at (null) IP: srp_recv_done+0x450/0x6b0 [ib_srp] Oops: 0000 [#1] SMP CPU: 10 PID: 0 Comm: swapper/10 Not tainted 4.10.0-rc7-dbg+ #1 Call Trace: __ib_process_cq+0x4b/0xd0 [ib_core] ib_poll_handler+0x1d/0x70 [ib_core] irq_poll_softirq+0xba/0x120 __do_softirq+0xba/0x4c0 irq_exit+0xbe/0xd0 smp_apic_timer_interrupt+0x38/0x50 apic_timer_interrupt+0x90/0xa0 RIP: srp_recv_done+0x450/0x6b0 [ib_srp] RSP: ffff88046f483e20 Signed-off-by: Bart Van Assche Cc: Israel Rukshin Cc: Max Gurtovoy Cc: Laurence Oberman Cc: Steve Feeley Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 97247582eaa6..a8239656e571 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1880,9 +1880,11 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) complete(&ch->tsk_mgmt_done); } else { scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag); - if (scmnd) { + if (scmnd && scmnd->host_scribble) { req = (void *)scmnd->host_scribble; scmnd = srp_claim_req(ch, req, NULL, scmnd); + } else { + scmnd = NULL; } if (!scmnd) { shost_printk(KERN_ERR, target->scsi_host, From 48e2181b0b8d1a1e226b2932a11d6f94aef28fb8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 14 Feb 2017 10:56:31 -0800 Subject: [PATCH 177/357] IB/srp: Fix race conditions related to task management commit 0a6fdbdeb1c25e31763c1fb333fa2723a7d2aba6 upstream. Avoid that srp_process_rsp() overwrites the status information in ch if the SRP target response timed out and processing of another task management function has already started. Avoid that issuing multiple task management functions concurrently triggers list corruption. This patch prevents that the following stack trace appears in the system log: WARNING: CPU: 8 PID: 9269 at lib/list_debug.c:52 __list_del_entry_valid+0xbc/0xc0 list_del corruption. prev->next should be ffffc90004bb7b00, but was ffff8804052ecc68 CPU: 8 PID: 9269 Comm: sg_reset Tainted: G W 4.10.0-rc7-dbg+ #3 Call Trace: dump_stack+0x68/0x93 __warn+0xc6/0xe0 warn_slowpath_fmt+0x4a/0x50 __list_del_entry_valid+0xbc/0xc0 wait_for_completion_timeout+0x12e/0x170 srp_send_tsk_mgmt+0x1ef/0x2d0 [ib_srp] srp_reset_device+0x5b/0x110 [ib_srp] scsi_ioctl_reset+0x1c7/0x290 scsi_ioctl+0x12a/0x420 sd_ioctl+0x9d/0x100 blkdev_ioctl+0x51e/0x9f0 block_ioctl+0x38/0x40 do_vfs_ioctl+0x8f/0x700 SyS_ioctl+0x3c/0x70 entry_SYSCALL_64_fastpath+0x18/0xad Signed-off-by: Bart Van Assche Cc: Israel Rukshin Cc: Max Gurtovoy Cc: Laurence Oberman Cc: Steve Feeley Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 45 +++++++++++++++++++---------- drivers/infiniband/ulp/srp/ib_srp.h | 1 + 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index a8239656e571..1eee8f7e75ca 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1872,12 +1872,17 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) { spin_lock_irqsave(&ch->lock, flags); ch->req_lim += be32_to_cpu(rsp->req_lim_delta); + if (rsp->tag == ch->tsk_mgmt_tag) { + ch->tsk_mgmt_status = -1; + if (be32_to_cpu(rsp->resp_data_len) >= 4) + ch->tsk_mgmt_status = rsp->data[3]; + complete(&ch->tsk_mgmt_done); + } else { + shost_printk(KERN_ERR, target->scsi_host, + "Received tsk mgmt response too late for tag %#llx\n", + rsp->tag); + } spin_unlock_irqrestore(&ch->lock, flags); - - ch->tsk_mgmt_status = -1; - if (be32_to_cpu(rsp->resp_data_len) >= 4) - ch->tsk_mgmt_status = rsp->data[3]; - complete(&ch->tsk_mgmt_done); } else { scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag); if (scmnd && scmnd->host_scribble) { @@ -2516,19 +2521,18 @@ srp_change_queue_depth(struct scsi_device *sdev, int qdepth) } static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, - u8 func) + u8 func, u8 *status) { struct srp_target_port *target = ch->target; struct srp_rport *rport = target->rport; struct ib_device *dev = target->srp_host->srp_dev->dev; struct srp_iu *iu; struct srp_tsk_mgmt *tsk_mgmt; + int res; if (!ch->connected || target->qp_in_error) return -1; - init_completion(&ch->tsk_mgmt_done); - /* * Lock the rport mutex to avoid that srp_create_ch_ib() is * invoked while a task management function is being sent. @@ -2551,10 +2555,16 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, tsk_mgmt->opcode = SRP_TSK_MGMT; int_to_scsilun(lun, &tsk_mgmt->lun); - tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT; tsk_mgmt->tsk_mgmt_func = func; tsk_mgmt->task_tag = req_tag; + spin_lock_irq(&ch->lock); + ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT; + tsk_mgmt->tag = ch->tsk_mgmt_tag; + spin_unlock_irq(&ch->lock); + + init_completion(&ch->tsk_mgmt_done); + ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt, DMA_TO_DEVICE); if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) { @@ -2563,13 +2573,15 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, return -1; } + res = wait_for_completion_timeout(&ch->tsk_mgmt_done, + msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)); + if (res > 0 && status) + *status = ch->tsk_mgmt_status; mutex_unlock(&rport->mutex); - if (!wait_for_completion_timeout(&ch->tsk_mgmt_done, - msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS))) - return -1; + WARN_ON_ONCE(res < 0); - return 0; + return res > 0 ? 0 : -1; } static int srp_abort(struct scsi_cmnd *scmnd) @@ -2595,7 +2607,7 @@ static int srp_abort(struct scsi_cmnd *scmnd) shost_printk(KERN_ERR, target->scsi_host, "Sending SRP abort for tag %#x\n", tag); if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun, - SRP_TSK_ABORT_TASK) == 0) + SRP_TSK_ABORT_TASK, NULL) == 0) ret = SUCCESS; else if (target->rport->state == SRP_RPORT_LOST) ret = FAST_IO_FAIL; @@ -2613,14 +2625,15 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) struct srp_target_port *target = host_to_target(scmnd->device->host); struct srp_rdma_ch *ch; int i; + u8 status; shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); ch = &target->ch[0]; if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun, - SRP_TSK_LUN_RESET)) + SRP_TSK_LUN_RESET, &status)) return FAILED; - if (ch->tsk_mgmt_status) + if (status) return FAILED; for (i = 0; i < target->ch_count; i++) { diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 21c69695f9d4..32ed40db3ca2 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -163,6 +163,7 @@ struct srp_rdma_ch { int max_ti_iu_len; int comp_vector; + u64 tsk_mgmt_tag; struct completion tsk_mgmt_done; u8 tsk_mgmt_status; bool connected; From d3381fab77cbca6f9664cf49b3f5dd3171f1f6d3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Feb 2017 06:06:16 +1300 Subject: [PATCH 178/357] fs: Better permission checking for submounts commit 93faccbbfa958a9668d3ab4e30f38dd205cee8d8 upstream. To support unprivileged users mounting filesystems two permission checks have to be performed: a test to see if the user allowed to create a mount in the mount namespace, and a test to see if the user is allowed to access the specified filesystem. The automount case is special in that mounting the original filesystem grants permission to mount the sub-filesystems, to any user who happens to stumble across the their mountpoint and satisfies the ordinary filesystem permission checks. Attempting to handle the automount case by using override_creds almost works. It preserves the idea that permission to mount the original filesystem is permission to mount the sub-filesystem. Unfortunately using override_creds messes up the filesystems ordinary permission checks. Solve this by being explicit that a mount is a submount by introducing vfs_submount, and using it where appropriate. vfs_submount uses a new mount internal mount flags MS_SUBMOUNT, to let sget and friends know that a mount is a submount so they can take appropriate action. sget and sget_userns are modified to not perform any permission checks on submounts. follow_automount is modified to stop using override_creds as that has proven problemantic. do_mount is modified to always remove the new MS_SUBMOUNT flag so that we know userspace will never by able to specify it. autofs4 is modified to stop using current_real_cred that was put in there to handle the previous version of submount permission checking. cifs is modified to pass the mountpoint all of the way down to vfs_submount. debugfs is modified to pass the mountpoint all of the way down to trace_automount by adding a new parameter. To make this change easier a new typedef debugfs_automount_t is introduced to capture the type of the debugfs automount function. Fixes: 069d5ac9ae0d ("autofs: Fix automounts by using current_real_cred()->uid") Fixes: aeaa4a79ff6a ("fs: Call d_automount with the filesystems creds") Reviewed-by: Trond Myklebust Reviewed-by: Seth Forshee Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/afs/mntpt.c | 2 +- fs/autofs4/waitq.c | 4 ++-- fs/cifs/cifs_dfs_ref.c | 7 ++++--- fs/debugfs/inode.c | 8 ++++---- fs/namei.c | 3 --- fs/namespace.c | 17 ++++++++++++++++- fs/nfs/namespace.c | 2 +- fs/nfs/nfs4namespace.c | 2 +- fs/super.c | 13 ++++++++++--- include/linux/debugfs.h | 3 ++- include/linux/mount.h | 3 +++ include/uapi/linux/fs.h | 1 + kernel/trace/trace.c | 4 ++-- 13 files changed, 47 insertions(+), 22 deletions(-) diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 81dd075356b9..d4fb0afc0097 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -202,7 +202,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) /* try and do the mount */ _debug("--- attempting mount %s -o %s ---", devname, options); - mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options); + mnt = vfs_submount(mntpt, &afs_fs_type, devname, options); _debug("--- mount result %p ---", mnt); free_page((unsigned long) devname); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index e44271dfceb6..5db6c8d745ea 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -431,8 +431,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, memcpy(&wq->name, &qstr, sizeof(struct qstr)); wq->dev = autofs4_get_dev(sbi); wq->ino = autofs4_get_ino(sbi); - wq->uid = current_real_cred()->uid; - wq->gid = current_real_cred()->gid; + wq->uid = current_cred()->uid; + wq->gid = current_cred()->gid; wq->pid = pid; wq->tgid = tgid; wq->status = -EINTR; /* Status return if interrupted */ diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index ec9dbbcca3b9..9156be545b0f 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -245,7 +245,8 @@ compose_mount_options_err: * @fullpath: full path in UNC format * @ref: server's referral */ -static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb, +static struct vfsmount *cifs_dfs_do_refmount(struct dentry *mntpt, + struct cifs_sb_info *cifs_sb, const char *fullpath, const struct dfs_info3_param *ref) { struct vfsmount *mnt; @@ -259,7 +260,7 @@ static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb, if (IS_ERR(mountdata)) return (struct vfsmount *)mountdata; - mnt = vfs_kern_mount(&cifs_fs_type, 0, devname, mountdata); + mnt = vfs_submount(mntpt, &cifs_fs_type, devname, mountdata); kfree(mountdata); kfree(devname); return mnt; @@ -334,7 +335,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) mnt = ERR_PTR(-EINVAL); break; } - mnt = cifs_dfs_do_refmount(cifs_sb, + mnt = cifs_dfs_do_refmount(mntpt, cifs_sb, full_path, referrals + i); cifs_dbg(FYI, "%s: cifs_dfs_do_refmount:%s , mnt:%p\n", __func__, referrals[i].node_name, mnt); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index f17fcf89e18e..1e30f74a9527 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -187,9 +187,9 @@ static const struct super_operations debugfs_super_operations = { static struct vfsmount *debugfs_automount(struct path *path) { - struct vfsmount *(*f)(void *); - f = (struct vfsmount *(*)(void *))path->dentry->d_fsdata; - return f(d_inode(path->dentry)->i_private); + debugfs_automount_t f; + f = (debugfs_automount_t)path->dentry->d_fsdata; + return f(path->dentry, d_inode(path->dentry)->i_private); } static const struct dentry_operations debugfs_dops = { @@ -504,7 +504,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir); */ struct dentry *debugfs_create_automount(const char *name, struct dentry *parent, - struct vfsmount *(*f)(void *), + debugfs_automount_t f, void *data) { struct dentry *dentry = start_creating(name, parent); diff --git a/fs/namei.c b/fs/namei.c index 5b4eed221530..d5e5140c1045 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1100,7 +1100,6 @@ static int follow_automount(struct path *path, struct nameidata *nd, bool *need_mntput) { struct vfsmount *mnt; - const struct cred *old_cred; int err; if (!path->dentry->d_op || !path->dentry->d_op->d_automount) @@ -1129,9 +1128,7 @@ static int follow_automount(struct path *path, struct nameidata *nd, if (nd->total_link_count >= 40) return -ELOOP; - old_cred = override_creds(&init_cred); mnt = path->dentry->d_op->d_automount(path); - revert_creds(old_cred); if (IS_ERR(mnt)) { /* * The filesystem is allowed to return -EISDIR here to indicate diff --git a/fs/namespace.c b/fs/namespace.c index ea751263fefa..5e35057f07ac 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -995,6 +995,21 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void } EXPORT_SYMBOL_GPL(vfs_kern_mount); +struct vfsmount * +vfs_submount(const struct dentry *mountpoint, struct file_system_type *type, + const char *name, void *data) +{ + /* Until it is worked out how to pass the user namespace + * through from the parent mount to the submount don't support + * unprivileged mounts with submounts. + */ + if (mountpoint->d_sb->s_user_ns != &init_user_ns) + return ERR_PTR(-EPERM); + + return vfs_kern_mount(type, MS_SUBMOUNT, name, data); +} +EXPORT_SYMBOL_GPL(vfs_submount); + static struct mount *clone_mnt(struct mount *old, struct dentry *root, int flag) { @@ -2779,7 +2794,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | - MS_STRICTATIME | MS_NOREMOTELOCK); + MS_STRICTATIME | MS_NOREMOTELOCK | MS_SUBMOUNT); if (flags & MS_REMOUNT) retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags, diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 5551e8ef67fd..e49d831c4e85 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -226,7 +226,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, const char *devname, struct nfs_clone_mount *mountdata) { - return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); + return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, mountdata); } /** diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index d21104912676..d8b040bd9814 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -279,7 +279,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, mountdata->hostname, mountdata->mnt_path); - mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata); + mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, mountdata); if (!IS_ERR(mnt)) break; } diff --git a/fs/super.c b/fs/super.c index c183835566c1..1058bf3e8724 100644 --- a/fs/super.c +++ b/fs/super.c @@ -470,7 +470,7 @@ struct super_block *sget_userns(struct file_system_type *type, struct super_block *old; int err; - if (!(flags & MS_KERNMOUNT) && + if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && !(type->fs_flags & FS_USERNS_MOUNT) && !capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); @@ -500,7 +500,7 @@ retry: } if (!s) { spin_unlock(&sb_lock); - s = alloc_super(type, flags, user_ns); + s = alloc_super(type, (flags & ~MS_SUBMOUNT), user_ns); if (!s) return ERR_PTR(-ENOMEM); goto retry; @@ -541,8 +541,15 @@ struct super_block *sget(struct file_system_type *type, { struct user_namespace *user_ns = current_user_ns(); + /* We don't yet pass the user namespace of the parent + * mount through to here so always use &init_user_ns + * until that changes. + */ + if (flags & MS_SUBMOUNT) + user_ns = &init_user_ns; + /* Ensure the requestor has permissions over the target filesystem */ - if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN)) + if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); return sget_userns(type, test, set, flags, user_ns, data); diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 1b413a9aab81..b20a0945b550 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -96,9 +96,10 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent); struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, const char *dest); +typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); struct dentry *debugfs_create_automount(const char *name, struct dentry *parent, - struct vfsmount *(*f)(void *), + debugfs_automount_t f, void *data); void debugfs_remove(struct dentry *dentry); diff --git a/include/linux/mount.h b/include/linux/mount.h index 1172cce949a4..e0f3a82eee6d 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -90,6 +90,9 @@ struct file_system_type; extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data); +extern struct vfsmount *vfs_submount(const struct dentry *mountpoint, + struct file_system_type *type, + const char *name, void *data); extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list); extern void mark_mounts_for_expiry(struct list_head *mounts); diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index acb2b6152ba0..474995568f35 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -132,6 +132,7 @@ struct inodes_stat_t { #define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ /* These sb flags are internal to the kernel */ +#define MS_SUBMOUNT (1<<26) #define MS_NOREMOTELOCK (1<<27) #define MS_NOSEC (1<<28) #define MS_BORN (1<<29) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8696ce6bf2f6..90b66ed6f0e2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7241,7 +7241,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) ftrace_init_tracefs(tr, d_tracer); } -static struct vfsmount *trace_automount(void *ingore) +static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) { struct vfsmount *mnt; struct file_system_type *type; @@ -7254,7 +7254,7 @@ static struct vfsmount *trace_automount(void *ingore) type = get_fs_type("tracefs"); if (!type) return NULL; - mnt = vfs_kern_mount(type, 0, "tracefs", NULL); + mnt = vfs_submount(mntpt, type, "tracefs", NULL); put_filesystem(type); if (IS_ERR(mnt)) return NULL; From b2b0f6ffd3f97953044e909ca311d2a580ae8575 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Feb 2017 16:43:36 +0100 Subject: [PATCH 179/357] orangefs: Use RCU for destroy_inode commit 0695d7dc1d9f19b82ec2cae24856bddce278cfe6 upstream. freeing of inodes must be RCU-delayed on all filesystems Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/orangefs/super.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index c48859f16e7b..67c24351a67f 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -115,6 +115,13 @@ static struct inode *orangefs_alloc_inode(struct super_block *sb) return &orangefs_inode->vfs_inode; } +static void orangefs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); + kmem_cache_free(orangefs_inode_cache, orangefs_inode); +} + static void orangefs_destroy_inode(struct inode *inode) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); @@ -123,7 +130,7 @@ static void orangefs_destroy_inode(struct inode *inode) "%s: deallocated %p destroying inode %pU\n", __func__, orangefs_inode, get_khandle_from_ino(inode)); - kmem_cache_free(orangefs_inode_cache, orangefs_inode); + call_rcu(&inode->i_rcu, orangefs_i_callback); } /* From 1f2ca141ec53a9c43ea0852f013907d19c435986 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 28 Nov 2016 16:17:56 +0100 Subject: [PATCH 180/357] memory/atmel-ebi: Fix ns <-> cycles conversions commit ee194289502a6901cc77dc9a893bf2afd351ac5e upstream. at91sam9_ebi_get_config() is incorrectly converting timings in clock cycles into timings in nanoseconds by multiplying the cycle values by the clk rate instead of the clk period. at91sam9_ebi_xslate_config() has the same problem for the tdf_ns -> tdf_cycles conversion. Signed-off-by: Boris Brezillon Reported-by: Chris Leahy Fixes: 6a4ec4cd0888 ("memory: add Atmel EBI (External Bus Interface) driver") Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/memory/atmel-ebi.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/memory/atmel-ebi.c b/drivers/memory/atmel-ebi.c index b5ed3bd082b5..e9ebc4f31d16 100644 --- a/drivers/memory/atmel-ebi.c +++ b/drivers/memory/atmel-ebi.c @@ -93,7 +93,7 @@ static void at91sam9_ebi_get_config(struct at91_ebi_dev *ebid, struct at91_ebi_dev_config *conf) { struct at91sam9_smc_generic_fields *fields = &ebid->ebi->sam9; - unsigned int clk_rate = clk_get_rate(ebid->ebi->clk); + unsigned int clk_period = NSEC_PER_SEC / clk_get_rate(ebid->ebi->clk); struct at91sam9_ebi_dev_config *config = &conf->sam9; struct at91sam9_smc_timings *timings = &config->timings; unsigned int val; @@ -102,43 +102,43 @@ static void at91sam9_ebi_get_config(struct at91_ebi_dev *ebid, config->mode = val & ~AT91_SMC_TDF; val = (val & AT91_SMC_TDF) >> 16; - timings->tdf_ns = clk_rate * val; + timings->tdf_ns = clk_period * val; regmap_fields_read(fields->setup, conf->cs, &val); timings->ncs_rd_setup_ns = (val >> 24) & 0x1f; timings->ncs_rd_setup_ns += ((val >> 29) & 0x1) * 128; - timings->ncs_rd_setup_ns *= clk_rate; + timings->ncs_rd_setup_ns *= clk_period; timings->nrd_setup_ns = (val >> 16) & 0x1f; timings->nrd_setup_ns += ((val >> 21) & 0x1) * 128; - timings->nrd_setup_ns *= clk_rate; + timings->nrd_setup_ns *= clk_period; timings->ncs_wr_setup_ns = (val >> 8) & 0x1f; timings->ncs_wr_setup_ns += ((val >> 13) & 0x1) * 128; - timings->ncs_wr_setup_ns *= clk_rate; + timings->ncs_wr_setup_ns *= clk_period; timings->nwe_setup_ns = val & 0x1f; timings->nwe_setup_ns += ((val >> 5) & 0x1) * 128; - timings->nwe_setup_ns *= clk_rate; + timings->nwe_setup_ns *= clk_period; regmap_fields_read(fields->pulse, conf->cs, &val); timings->ncs_rd_pulse_ns = (val >> 24) & 0x3f; timings->ncs_rd_pulse_ns += ((val >> 30) & 0x1) * 256; - timings->ncs_rd_pulse_ns *= clk_rate; + timings->ncs_rd_pulse_ns *= clk_period; timings->nrd_pulse_ns = (val >> 16) & 0x3f; timings->nrd_pulse_ns += ((val >> 22) & 0x1) * 256; - timings->nrd_pulse_ns *= clk_rate; + timings->nrd_pulse_ns *= clk_period; timings->ncs_wr_pulse_ns = (val >> 8) & 0x3f; timings->ncs_wr_pulse_ns += ((val >> 14) & 0x1) * 256; - timings->ncs_wr_pulse_ns *= clk_rate; + timings->ncs_wr_pulse_ns *= clk_period; timings->nwe_pulse_ns = val & 0x3f; timings->nwe_pulse_ns += ((val >> 6) & 0x1) * 256; - timings->nwe_pulse_ns *= clk_rate; + timings->nwe_pulse_ns *= clk_period; regmap_fields_read(fields->cycle, conf->cs, &val); timings->nrd_cycle_ns = (val >> 16) & 0x7f; timings->nrd_cycle_ns += ((val >> 23) & 0x3) * 256; - timings->nrd_cycle_ns *= clk_rate; + timings->nrd_cycle_ns *= clk_period; timings->nwe_cycle_ns = val & 0x7f; timings->nwe_cycle_ns += ((val >> 7) & 0x3) * 256; - timings->nwe_cycle_ns *= clk_rate; + timings->nwe_cycle_ns *= clk_period; } static int at91_xlate_timing(struct device_node *np, const char *prop, @@ -334,6 +334,7 @@ static int at91sam9_ebi_apply_config(struct at91_ebi_dev *ebid, struct at91_ebi_dev_config *conf) { unsigned int clk_rate = clk_get_rate(ebid->ebi->clk); + unsigned int clk_period = NSEC_PER_SEC / clk_rate; struct at91sam9_ebi_dev_config *config = &conf->sam9; struct at91sam9_smc_timings *timings = &config->timings; struct at91sam9_smc_generic_fields *fields = &ebid->ebi->sam9; @@ -376,7 +377,7 @@ static int at91sam9_ebi_apply_config(struct at91_ebi_dev *ebid, val |= AT91SAM9_SMC_NWECYCLE(coded_val); regmap_fields_write(fields->cycle, conf->cs, val); - val = DIV_ROUND_UP(timings->tdf_ns, clk_rate); + val = DIV_ROUND_UP(timings->tdf_ns, clk_period); if (val > AT91_SMC_TDF_MAX) val = AT91_SMC_TDF_MAX; regmap_fields_write(fields->mode, conf->cs, From ff61e0123b7d8fe13ae8b6f96e5c3abfc11357fc Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 7 Feb 2017 12:05:25 -0500 Subject: [PATCH 181/357] ktest: Fix child exit code processing commit 32677207dcc5e594254b7fb4fb2352b1755b1d5b upstream. The child_exit errno needs to be shifted by 8 bits to compare against the return values for the bisect variables. Fixes: c5dacb88f0a64 ("ktest: Allow overriding bisect test results") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- tools/testing/ktest/ktest.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index d08e214ec6e7..223d88e25e05 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -2629,7 +2629,7 @@ sub do_run_test { } waitpid $child_pid, 0; - $child_exit = $?; + $child_exit = $? >> 8; my $end_time = time; $test_time = $end_time - $start_time; From 5da90d0018a7f9ed23698663d046146426991ffa Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 14 Feb 2017 10:09:40 -0500 Subject: [PATCH 182/357] ceph: remove req from unsafe list when unregistering it commit df963ea8a082d31521a120e8e31a29ad8a1dc215 upstream. There's no reason a request should ever be on a s_unsafe list but not in the request tree. Link: http://tracker.ceph.com/issues/18474 Signed-off-by: Jeff Layton Reviewed-by: Yan, Zheng Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/mds_client.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6a26c7bd1286..e3e1a80b351e 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -628,6 +628,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc, { dout("__unregister_request %p tid %lld\n", req, req->r_tid); + /* Never leave an unregistered request on an unsafe list! */ + list_del_init(&req->r_unsafe_item); + if (req->r_tid == mdsc->oldest_tid) { struct rb_node *p = rb_next(&req->r_node); mdsc->oldest_tid = 0; @@ -1036,7 +1039,6 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc, while (!list_empty(&session->s_unsafe)) { req = list_first_entry(&session->s_unsafe, struct ceph_mds_request, r_unsafe_item); - list_del_init(&req->r_unsafe_item); pr_warn_ratelimited(" dropping unsafe request %llu\n", req->r_tid); __unregister_request(mdsc, req); @@ -2423,7 +2425,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) * useful we could do with a revised return value. */ dout("got safe reply %llu, mds%d\n", tid, mds); - list_del_init(&req->r_unsafe_item); /* last unsafe request during umount? */ if (mdsc->stopping && !__get_oldest_req(mdsc)) From 4ee3508f7abece538c4a99c71d81e94f8df44d97 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 16 Feb 2017 10:22:32 +1100 Subject: [PATCH 183/357] pci/hotplug/pnv-php: Remove WARN_ON() in pnv_php_put_slot() commit 36c7c9da40c408a71e5e6bfe12e57dcf549a296d upstream. The WARN_ON() causes unnecessary backtrace when putting the parent slot, which is likely to be NULL. WARNING: CPU: 2 PID: 1071 at drivers/pci/hotplug/pnv_php.c:85 \ pnv_php_release+0xcc/0x150 [pnv_php] : Call Trace: [c0000003bc007c10] [d00000000ad613c4] pnv_php_release+0x144/0x150 [pnv_php] [c0000003bc007c40] [c0000000006641d8] pci_hp_deregister+0x238/0x330 [c0000003bc007cd0] [d00000000ad61440] pnv_php_unregister_one+0x70/0xa0 [pnv_php] [c0000003bc007d10] [d00000000ad614c0] pnv_php_unregister+0x50/0x80 [pnv_php] [c0000003bc007d40] [d00000000ad61e84] pnv_php_exit+0x50/0xcb4 [pnv_php] [c0000003bc007d70] [c00000000019499c] SyS_delete_module+0x1fc/0x2a0 [c0000003bc007e30] [c00000000000b184] system_call+0x38/0xe0 Fixes: 66725152fb9f ("PCI/hotplug: PowerPC PowerNV PCI hotplug driver") Signed-off-by: Gavin Shan Reviewed-by: Andrew Donnellan Tested-by: Vaibhav Jain Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/pnv_php.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index acb2be0c8c2c..f96588a8f29b 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -82,7 +82,7 @@ static void pnv_php_free_slot(struct kref *kref) static inline void pnv_php_put_slot(struct pnv_php_slot *php_slot) { - if (WARN_ON(!php_slot)) + if (!php_slot) return; kref_put(&php_slot->kref, pnv_php_free_slot); From 54eff720c99f2aeb096eb3bb98b4475bd1a0ebd9 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 16 Feb 2017 10:22:33 +1100 Subject: [PATCH 184/357] pci/hotplug/pnv-php: Disable surprise hotplug capability on conflicts commit 303529d6ef1293513c2c73c9ab86489eebb37d08 upstream. The root port or PCIe switch downstream port might have been associated with driver other than pnv-php. The MSI or MSIx might also have been enabled by that driver (e.g. pcieport_drv). Attempt to enable MSI incurs below backtrace: PowerPC PowerNV PCI Hotplug Driver version: 0.1 ------------[ cut here ]------------ WARNING: CPU: 19 PID: 1004 at drivers/pci/msi.c:1071 \ __pci_enable_msi_range+0x84/0x4e0 NIP [c000000000665c34] __pci_enable_msi_range+0x84/0x4e0 LR [c000000000665c24] __pci_enable_msi_range+0x74/0x4e0 Call Trace: [c000000384d67600] [c000000000665c24] __pci_enable_msi_range+0x74/0x4e0 [c000000384d676e0] [d00000000aa31b04] pnv_php_register+0x564/0x5a0 [pnv_php] [c000000384d677c0] [d00000000aa31658] pnv_php_register+0xb8/0x5a0 [pnv_php] [c000000384d678a0] [d00000000aa31658] pnv_php_register+0xb8/0x5a0 [pnv_php] [c000000384d67980] [d00000000aa31dfc] pnv_php_init+0x60/0x98 [pnv_php] [c000000384d679f0] [c00000000000cfdc] do_one_initcall+0x6c/0x1d0 [c000000384d67ab0] [c000000000b92354] do_init_module+0x94/0x254 [c000000384d67b40] [c00000000019719c] load_module+0x258c/0x2c60 [c000000384d67d30] [c000000000197bb0] SyS_finit_module+0xf0/0x170 [c000000384d67e30] [c00000000000b184] system_call+0x38/0xe0 This fixes the issue by skipping enabling the surprise hotplug capability if the MSI or MSIx on the PCI slot's upstream port has been enabled by other driver. Fixes: 360aebd85a4c ("drivers/pci/hotplug: Support surprise hotplug in powernv driver") Signed-off-by: Gavin Shan Reviewed-by: Andrew Donnellan Tested-by: Vaibhav Jain Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/pnv_php.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index f96588a8f29b..99b05338b4bc 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -799,6 +799,14 @@ static void pnv_php_enable_irq(struct pnv_php_slot *php_slot) struct pci_dev *pdev = php_slot->pdev; int irq, ret; + /* + * The MSI/MSIx interrupt might have been occupied by other + * drivers. Don't populate the surprise hotplug capability + * in that case. + */ + if (pci_dev_msi_enabled(pdev)) + return; + ret = pci_enable_device(pdev); if (ret) { dev_warn(&pdev->dev, "Error %d enabling device\n", ret); From 17ea11d553220281ab037799642ecd45246c3a65 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 22 Feb 2017 22:06:32 -0800 Subject: [PATCH 185/357] target: Fix NULL dereference during LUN lookup + active I/O shutdown commit bd4e2d2907fa23a11d46217064ecf80470ddae10 upstream. When transport_clear_lun_ref() is shutting down a se_lun via configfs with new I/O in-flight, it's possible to trigger a NULL pointer dereference in transport_lookup_cmd_lun() due to the fact percpu_ref_get() doesn't do any __PERCPU_REF_DEAD checking before incrementing lun->lun_ref.count after lun->lun_ref has switched to atomic_t mode. This results in a NULL pointer dereference as LUN shutdown code in core_tpg_remove_lun() continues running after the existing ->release() -> core_tpg_lun_ref_release() callback completes, and clears the RCU protected se_lun->lun_se_dev pointer. During the OOPs, the state of lun->lun_ref in the process which triggered the NULL pointer dereference looks like the following on v4.1.y stable code: struct se_lun { lun_link_magic = 4294932337, lun_status = TRANSPORT_LUN_STATUS_FREE, ..... lun_se_dev = 0x0, lun_sep = 0x0, ..... lun_ref = { count = { counter = 1 }, percpu_count_ptr = 3, release = 0xffffffffa02fa1e0 , confirm_switch = 0x0, force_atomic = false, rcu = { next = 0xffff88154fa1a5d0, func = 0xffffffff8137c4c0 } } } To address this bug, use percpu_ref_tryget_live() to ensure once __PERCPU_REF_DEAD is visable on all CPUs and ->lun_ref has switched to atomic_t, all new I/Os will fail to obtain a new lun->lun_ref reference. Also use an explicit percpu_ref_kill_and_confirm() callback to block on ->lun_ref_comp to allow the first stage and associated RCU grace period to complete, and then block on ->lun_ref_shutdown waiting for the final percpu_ref_put() to drop the last reference via transport_lun_remove_cmd() before continuing with core_tpg_remove_lun() shutdown. Reported-by: Rob Millner Tested-by: Rob Millner Cc: Rob Millner Tested-by: Vaibhav Tandon Cc: Vaibhav Tandon Tested-by: Bryant G. Ly Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_device.c | 10 +++++++-- drivers/target/target_core_tpg.c | 3 ++- drivers/target/target_core_transport.c | 31 +++++++++++++++++++++++++- include/target/target_core_base.h | 1 + 4 files changed, 41 insertions(+), 4 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index ea9617c7b403..cc38a3509f78 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -77,12 +77,16 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun) &deve->read_bytes); se_lun = rcu_dereference(deve->se_lun); + + if (!percpu_ref_tryget_live(&se_lun->lun_ref)) { + se_lun = NULL; + goto out_unlock; + } + se_cmd->se_lun = rcu_dereference(deve->se_lun); se_cmd->pr_res_key = deve->pr_res_key; se_cmd->orig_fe_lun = unpacked_lun; se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD; - - percpu_ref_get(&se_lun->lun_ref); se_cmd->lun_ref_active = true; if ((se_cmd->data_direction == DMA_TO_DEVICE) && @@ -96,6 +100,7 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun) goto ref_dev; } } +out_unlock: rcu_read_unlock(); if (!se_lun) { @@ -815,6 +820,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) xcopy_lun = &dev->xcopy_lun; rcu_assign_pointer(xcopy_lun->lun_se_dev, dev); init_completion(&xcopy_lun->lun_ref_comp); + init_completion(&xcopy_lun->lun_shutdown_comp); INIT_LIST_HEAD(&xcopy_lun->lun_deve_list); INIT_LIST_HEAD(&xcopy_lun->lun_dev_link); mutex_init(&xcopy_lun->lun_tg_pt_md_mutex); diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index d99752c6cd60..2744251178ad 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -445,7 +445,7 @@ static void core_tpg_lun_ref_release(struct percpu_ref *ref) { struct se_lun *lun = container_of(ref, struct se_lun, lun_ref); - complete(&lun->lun_ref_comp); + complete(&lun->lun_shutdown_comp); } int core_tpg_register( @@ -571,6 +571,7 @@ struct se_lun *core_tpg_alloc_lun( lun->lun_link_magic = SE_LUN_LINK_MAGIC; atomic_set(&lun->lun_acl_count, 0); init_completion(&lun->lun_ref_comp); + init_completion(&lun->lun_shutdown_comp); INIT_LIST_HEAD(&lun->lun_deve_list); INIT_LIST_HEAD(&lun->lun_dev_link); atomic_set(&lun->lun_tg_pt_secondary_offline, 0); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 767d1eb6e035..cae4dea6464e 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2702,10 +2702,39 @@ void target_wait_for_sess_cmds(struct se_session *se_sess) } EXPORT_SYMBOL(target_wait_for_sess_cmds); +static void target_lun_confirm(struct percpu_ref *ref) +{ + struct se_lun *lun = container_of(ref, struct se_lun, lun_ref); + + complete(&lun->lun_ref_comp); +} + void transport_clear_lun_ref(struct se_lun *lun) { - percpu_ref_kill(&lun->lun_ref); + /* + * Mark the percpu-ref as DEAD, switch to atomic_t mode, drop + * the initial reference and schedule confirm kill to be + * executed after one full RCU grace period has completed. + */ + percpu_ref_kill_and_confirm(&lun->lun_ref, target_lun_confirm); + /* + * The first completion waits for percpu_ref_switch_to_atomic_rcu() + * to call target_lun_confirm after lun->lun_ref has been marked + * as __PERCPU_REF_DEAD on all CPUs, and switches to atomic_t + * mode so that percpu_ref_tryget_live() lookup of lun->lun_ref + * fails for all new incoming I/O. + */ wait_for_completion(&lun->lun_ref_comp); + /* + * The second completion waits for percpu_ref_put_many() to + * invoke ->release() after lun->lun_ref has switched to + * atomic_t mode, and lun->lun_ref.count has reached zero. + * + * At this point all target-core lun->lun_ref references have + * been dropped via transport_lun_remove_cmd(), and it's safe + * to proceed with the remaining LUN shutdown. + */ + wait_for_completion(&lun->lun_shutdown_comp); } static bool diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 48bc1ac1da43..6233e8fd95b5 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -732,6 +732,7 @@ struct se_lun { struct config_group lun_group; struct se_port_stat_grps port_stat_grps; struct completion lun_ref_comp; + struct completion lun_shutdown_comp; struct percpu_ref lun_ref; struct list_head lun_dev_link; struct hlist_node link; From 1afe7b4ac3df20d2b9afd90d475fa466027a6595 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 11 Jan 2017 11:50:06 +1100 Subject: [PATCH 186/357] drivers/pci/hotplug: Handle presence detection change properly commit d7d55536c6cd1f80295b6d7483ad0587b148bde4 upstream. The surprise hotplug is driven by interrupt in PowerNV PCI hotplug driver. In the interrupt handler, pnv_php_interrupt(), we bail when pnv_pci_get_presence_state() returns zero wrongly. It causes the presence change event is always ignored incorrectly. This fixes the issue by bailing on error (non-zero value) returned from pnv_pci_get_presence_state(). Fixes: 360aebd85a4 ("drivers/pci/hotplug: Support surprise hotplug in powernv driver") Reported-by: Hank Chang Signed-off-by: Gavin Shan Tested-by: Willie Liauw Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/pnv_php.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 99b05338b4bc..e1c29909e7d3 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -713,8 +713,12 @@ static irqreturn_t pnv_php_interrupt(int irq, void *data) added = !!(lsts & PCI_EXP_LNKSTA_DLLLA); } else if (sts & PCI_EXP_SLTSTA_PDC) { ret = pnv_pci_get_presence_state(php_slot->id, &presence); - if (!ret) + if (ret) { + dev_warn(&pdev->dev, "PCI slot [%s] error %d getting presence (0x%04x), to retry the operation.\n", + php_slot->name, ret, sts); return IRQ_HANDLED; + } + added = !!(presence == OPAL_PCI_SLOT_PRESENT); } else { return IRQ_NONE; From 4f77c55c3ac232c4dfa308ce147d943b594a17ca Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 11 Jan 2017 11:50:07 +1100 Subject: [PATCH 187/357] drivers/pci/hotplug: Fix initial state for empty slot commit d0c424971f70501ec0a0364117b9934db039c9cc upstream. In PowerNV PCI hotplug driver, the initial PCI slot's state is set to PNV_PHP_STATE_POPULATED if no PCI devices are connected to the slot. The PCI devices that are hot added to the slot won't be probed and populated because of the check in pnv_php_enable(): /* Check if the slot has been configured */ if (php_slot->state != PNV_PHP_STATE_REGISTERED) return 0; This fixes the issue by leaving the slot in PNV_PHP_STATE_REGISTERED state initially if nothing is connected to the slot. Fixes: 360aebd85a4 ("drivers/pci/hotplug: Support surprise hotplug in powernv driver") Reported-by: Hank Chang Signed-off-by: Gavin Shan Tested-by: Willie Liauw Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/pnv_php.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index e1c29909e7d3..e96973b95e7a 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -436,9 +436,21 @@ static int pnv_php_enable(struct pnv_php_slot *php_slot, bool rescan) if (ret) return ret; - /* Proceed if there have nothing behind the slot */ - if (presence == OPAL_PCI_SLOT_EMPTY) + /* + * Proceed if there have nothing behind the slot. However, + * we should leave the slot in registered state at the + * beginning. Otherwise, the PCI devices inserted afterwards + * won't be probed and populated. + */ + if (presence == OPAL_PCI_SLOT_EMPTY) { + if (!php_slot->power_state_check) { + php_slot->power_state_check = true; + + return 0; + } + goto scan; + } /* * If the power supply to the slot is off, we can't detect From 1f2f16c7b7e307ce0ef1764e6142e1dd2007e40e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 11 Feb 2017 10:37:38 -0500 Subject: [PATCH 188/357] nlm: Ensure callback code also checks that the files match commit 251af29c320d86071664f02c76f0d063a19fefdf upstream. It is not sufficient to just check that the lock pids match when granting a callback, we also need to ensure that we're granting the callback on the right file. Reported-by: Pankaj Singh Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- include/linux/lockd/lockd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index c15373894a42..b37dee3acaba 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -355,7 +355,8 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp) static inline int nlm_compare_locks(const struct file_lock *fl1, const struct file_lock *fl2) { - return fl1->fl_pid == fl2->fl_pid + return file_inode(fl1->fl_file) == file_inode(fl2->fl_file) + && fl1->fl_pid == fl2->fl_pid && fl1->fl_owner == fl2->fl_owner && fl1->fl_start == fl2->fl_start && fl1->fl_end == fl2->fl_end From 019c088dc403448aa29e8650c551baa1e5e29e0f Mon Sep 17 00:00:00 2001 From: Clemens Gruber Date: Tue, 13 Dec 2016 16:52:50 +0100 Subject: [PATCH 189/357] pwm: pca9685: Fix period change with same duty cycle commit 8d254a340efb12b40c4c1ff25a48a4f48f7bbd6b upstream. When first implementing support for changing the output frequency, an optimization was added to continue the PWM after changing the prescaler without having to reprogram the ON and OFF registers for the duty cycle, in case the duty cycle stayed the same. This was flawed, because we compared the absolute value of the duty cycle in nanoseconds instead of the ratio to the period. Fix the problem by removing the shortcut. Fixes: 01ec8472009c9 ("pwm-pca9685: Support changing the output frequency") Signed-off-by: Clemens Gruber Reviewed-by: Mika Westerberg Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/pwm/pwm-pca9685.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c index 117fccf7934a..01a6a83f625d 100644 --- a/drivers/pwm/pwm-pca9685.c +++ b/drivers/pwm/pwm-pca9685.c @@ -65,7 +65,6 @@ #define PCA9685_MAXCHAN 0x10 #define LED_FULL (1 << 4) -#define MODE1_RESTART (1 << 7) #define MODE1_SLEEP (1 << 4) #define MODE2_INVRT (1 << 4) #define MODE2_OUTDRV (1 << 2) @@ -117,16 +116,6 @@ static int pca9685_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, udelay(500); pca->period_ns = period_ns; - - /* - * If the duty cycle did not change, restart PWM with - * the same duty cycle to period ratio and return. - */ - if (duty_ns == pca->duty_ns) { - regmap_update_bits(pca->regmap, PCA9685_MODE1, - MODE1_RESTART, 0x1); - return 0; - } } else { dev_err(chip->dev, "prescaler not set: period out of bounds!\n"); From a7b9c9ddb6f457bab2fc8527ffccf16eefba4af4 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 3 Jan 2017 09:37:34 -0800 Subject: [PATCH 190/357] xtensa: move parse_tag_fdt out of #ifdef CONFIG_BLK_DEV_INITRD commit 4ab18701c66552944188dbcd0ce0012729baab84 upstream. FDT tag parsing is not related to whether BLK_DEV_INITRD is configured or not, move it out of the corresponding #ifdef/#endif block. This fixes passing external FDT to the kernel configured w/o BLK_DEV_INITRD support. Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 32cdc2c52e98..a45d32abea26 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -133,6 +133,8 @@ static int __init parse_tag_initrd(const bp_tag_t* tag) __tagtable(BP_TAG_INITRD, parse_tag_initrd); +#endif /* CONFIG_BLK_DEV_INITRD */ + #ifdef CONFIG_OF static int __init parse_tag_fdt(const bp_tag_t *tag) @@ -145,8 +147,6 @@ __tagtable(BP_TAG_FDT, parse_tag_fdt); #endif /* CONFIG_OF */ -#endif /* CONFIG_BLK_DEV_INITRD */ - static int __init parse_tag_cmdline(const bp_tag_t* tag) { strlcpy(command_line, (char *)(tag->data), COMMAND_LINE_SIZE); From 9ad1571da2c0139db952d8df5a9e4d67a752948b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 28 Feb 2017 18:32:48 -0800 Subject: [PATCH 191/357] nfit, libnvdimm: fix interleave set cookie calculation commit 86ef58a4e35e8fa66afb5898cf6dec6a3bb29f67 upstream. The interleave-set cookie is a sum that sanity checks the composition of an interleave set has not changed from when the namespace was initially created. The checksum is calculated by sorting the DIMMs by their location in the interleave-set. The comparison for the sort must be 64-bit wide, not byte-by-byte as performed by memcmp() in the broken case. Fix the implementation to accept correct cookie values in addition to the Linux "memcmp" order cookies, but only allow correct cookies to be generated going forward. It does mean that namespaces created by third-party-tooling, or created by newer kernels with this fix, will not validate on older kernels. However, there are a couple mitigating conditions: 1/ platforms with namespace-label capable NVDIMMs are not widely available. 2/ interleave-sets with a single-dimm are by definition not affected (nothing to sort). This covers the QEMU-KVM NVDIMM emulation case. The cookie stored in the namespace label will be fixed by any write the namespace label, the most straightforward way to achieve this is to write to the "alt_name" attribute of a namespace in sysfs. Fixes: eaf961536e16 ("libnvdimm, nfit: add interleave-set state-tracking infrastructure") Reported-by: Nicholas Moulin Tested-by: Nicholas Moulin Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/nfit/core.c | 16 +++++++++++++++- drivers/nvdimm/namespace_devs.c | 18 ++++++++++++++---- drivers/nvdimm/nd.h | 1 + drivers/nvdimm/region_devs.c | 9 +++++++++ include/linux/libnvdimm.h | 2 ++ 5 files changed, 41 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 6eb6733a7a5c..d1664df001f8 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1603,7 +1603,7 @@ static size_t sizeof_nfit_set_info(int num_mappings) + num_mappings * sizeof(struct nfit_set_info_map); } -static int cmp_map(const void *m0, const void *m1) +static int cmp_map_compat(const void *m0, const void *m1) { const struct nfit_set_info_map *map0 = m0; const struct nfit_set_info_map *map1 = m1; @@ -1612,6 +1612,14 @@ static int cmp_map(const void *m0, const void *m1) sizeof(u64)); } +static int cmp_map(const void *m0, const void *m1) +{ + const struct nfit_set_info_map *map0 = m0; + const struct nfit_set_info_map *map1 = m1; + + return map0->region_offset - map1->region_offset; +} + /* Retrieve the nth entry referencing this spa */ static struct acpi_nfit_memory_map *memdev_from_spa( struct acpi_nfit_desc *acpi_desc, u16 range_index, int n) @@ -1667,6 +1675,12 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map), cmp_map, NULL); nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0); + + /* support namespaces created with the wrong sort order */ + sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map), + cmp_map_compat, NULL); + nd_set->altcookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0); + ndr_desc->nd_set = nd_set; devm_kfree(dev, info); diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index aefca644219b..a38ae34b74e4 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1700,6 +1700,7 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id) struct device *create_namespace_pmem(struct nd_region *nd_region, struct nd_namespace_label *nd_label) { + u64 altcookie = nd_region_interleave_set_altcookie(nd_region); u64 cookie = nd_region_interleave_set_cookie(nd_region); struct nd_label_ent *label_ent; struct nd_namespace_pmem *nspm; @@ -1718,7 +1719,11 @@ struct device *create_namespace_pmem(struct nd_region *nd_region, if (__le64_to_cpu(nd_label->isetcookie) != cookie) { dev_dbg(&nd_region->dev, "invalid cookie in label: %pUb\n", nd_label->uuid); - return ERR_PTR(-EAGAIN); + if (__le64_to_cpu(nd_label->isetcookie) != altcookie) + return ERR_PTR(-EAGAIN); + + dev_dbg(&nd_region->dev, "valid altcookie in label: %pUb\n", + nd_label->uuid); } nspm = kzalloc(sizeof(*nspm), GFP_KERNEL); @@ -1733,9 +1738,14 @@ struct device *create_namespace_pmem(struct nd_region *nd_region, res->name = dev_name(&nd_region->dev); res->flags = IORESOURCE_MEM; - for (i = 0; i < nd_region->ndr_mappings; i++) - if (!has_uuid_at_pos(nd_region, nd_label->uuid, cookie, i)) - break; + for (i = 0; i < nd_region->ndr_mappings; i++) { + if (has_uuid_at_pos(nd_region, nd_label->uuid, cookie, i)) + continue; + if (has_uuid_at_pos(nd_region, nd_label->uuid, altcookie, i)) + continue; + break; + } + if (i < nd_region->ndr_mappings) { struct nvdimm_drvdata *ndd = to_ndd(&nd_region->mapping[i]); diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index d3b2fca8deec..d869236b474f 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -327,6 +327,7 @@ struct nd_region *to_nd_region(struct device *dev); int nd_region_to_nstype(struct nd_region *nd_region); int nd_region_register_namespaces(struct nd_region *nd_region, int *err); u64 nd_region_interleave_set_cookie(struct nd_region *nd_region); +u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region); void nvdimm_bus_lock(struct device *dev); void nvdimm_bus_unlock(struct device *dev); bool is_nvdimm_bus_locked(struct device *dev); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 6af5e629140c..9cf6f1a88fce 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -505,6 +505,15 @@ u64 nd_region_interleave_set_cookie(struct nd_region *nd_region) return 0; } +u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region) +{ + struct nd_interleave_set *nd_set = nd_region->nd_set; + + if (nd_set) + return nd_set->altcookie; + return 0; +} + void nd_mapping_free_labels(struct nd_mapping *nd_mapping) { struct nd_label_ent *label_ent, *e; diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 8458c5351e56..77e7af32543f 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -70,6 +70,8 @@ struct nd_cmd_desc { struct nd_interleave_set { u64 cookie; + /* compatibility with initial buggy Linux implementation */ + u64 altcookie; }; struct nd_mapping_desc { From b116db0da1bd97ce405c19b304955d54f650eea9 Mon Sep 17 00:00:00 2001 From: Matt Chen Date: Sun, 22 Jan 2017 02:16:58 +0800 Subject: [PATCH 192/357] mac80211: flush delayed work when entering suspend commit a9e9200d8661c1a0be8c39f93deb383dc940de35 upstream. The issue was found when entering suspend and resume. It triggers a warning in: mac80211/key.c: ieee80211_enable_keys() ... WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt || sdata->crypto_tx_tailroom_pending_dec); ... It points out sdata->crypto_tx_tailroom_pending_dec isn't cleaned up successfully in a delayed_work during suspend. Add a flush_delayed_work to fix it. Signed-off-by: Matt Chen Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 28a3a0957c9e..76a8bcd8ef11 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -168,6 +168,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) break; } + flush_delayed_work(&sdata->dec_tailroom_needed_wk); drv_remove_interface(local, sdata); } From 42e7f3771429d40b5307f0db0ecdba61e79272ac Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 6 Feb 2017 15:28:42 +0200 Subject: [PATCH 193/357] mac80211: don't reorder frames with SN smaller than SSN commit b7540d8f25c8034de7e4163fc23ac457bf057731 upstream. When RX aggregation starts, transmitter may continue send frames with SN smaller than SSN until the AddBA response is received. However, the reorder buffer is already initialized at this point, which will cause the drop of such frames as duplicates since the head SN of the reorder buffer is set to the SSN, which is bigger. Signed-off-by: Sara Sharon Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/agg-rx.c | 1 + net/mac80211/rx.c | 14 +++++++++++++- net/mac80211/sta_info.h | 6 ++++-- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 3b5fd4188f2a..58ad23a44109 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -398,6 +398,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, tid_agg_rx->timeout = timeout; tid_agg_rx->stored_mpdu_num = 0; tid_agg_rx->auto_seq = auto_seq; + tid_agg_rx->started = false; tid_agg_rx->reorder_buf_filtered = 0; status = WLAN_STATUS_SUCCESS; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 2384b4aae064..8a344bd3b52a 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4,7 +4,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright(c) 2015 - 2016 Intel Deutschland GmbH + * Copyright(c) 2015 - 2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -1034,6 +1034,18 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata buf_size = tid_agg_rx->buf_size; head_seq_num = tid_agg_rx->head_seq_num; + /* + * If the current MPDU's SN is smaller than the SSN, it shouldn't + * be reordered. + */ + if (unlikely(!tid_agg_rx->started)) { + if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) { + ret = false; + goto out; + } + tid_agg_rx->started = true; + } + /* frame with out of date sequence number */ if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) { dev_kfree_skb(skb); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index dd06ef0b8861..15599c70a38f 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -189,6 +189,7 @@ struct tid_ampdu_tx { * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and * and ssn. * @removed: this session is removed (but might have been found due to RCU) + * @started: this session has started (head ssn or higher was received) * * This structure's lifetime is managed by RCU, assignments to * the array holding it must hold the aggregation mutex. @@ -212,8 +213,9 @@ struct tid_ampdu_rx { u16 ssn; u16 buf_size; u16 timeout; - bool auto_seq; - bool removed; + u8 auto_seq:1, + removed:1, + started:1; }; /** From 39813849996f96a764df9b993ba726e995aa1dda Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 22 Feb 2017 16:16:07 +0100 Subject: [PATCH 194/357] mac80211: don't handle filtered frames within a BA session commit 890030d3c425f49abaa4acf60e20f288b599f980 upstream. When running a BA session, the driver (or the hardware) already takes care of retransmitting failed frames, since it has to keep the receiver reorder window in sync. Adding another layer of retransmit around that does not improve anything. In fact, it can only lead to some strong reordering with huge latency. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/status.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index ddf71c648cab..ad37b4e58c2f 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -51,7 +51,8 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, struct ieee80211_hdr *hdr = (void *)skb->data; int ac; - if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) { + if (info->flags & (IEEE80211_TX_CTL_NO_PS_BUFFER | + IEEE80211_TX_CTL_AMPDU)) { ieee80211_free_txskb(&local->hw, skb); return; } From b507df2e236e5edc868ea8ac572567f1de0107f7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 27 Feb 2017 09:38:11 +0100 Subject: [PATCH 195/357] mac80211: use driver-indicated transmitter STA only for data frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 19d19e960598161be92a7e4828eb7706c6410ce6 upstream. When I originally introduced using the driver-indicated station as an optimisation to avoid the hashtable lookup/iteration, of course it wasn't intended to really functionally change anything. I neglected, however, to take into account VLAN interfaces, which have the property that management and data frames are handled differently: data frames go directly to the station and the VLAN while management frames continue to be processed over the underlying/associated AP-type interface. As a consequence, when a driver used this optimisation for management frames and the user enabled VLANs, my change broke things since any management frames, particularly disassoc/deauth, were missed by hostapd. Fix this by restoring the original code path for non-data frames, they aren't critical for performance to begin with. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=194713. Big thanks goes to Jarek who bisected the issue and provided a very detailed bug report, including the crucial information that he was using VLANs in his configuration. Fixes: 771e846bea9e ("mac80211: allow passing transmitter station on RX") Reported-and-tested-by: Jarek Kamiński Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rx.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 8a344bd3b52a..a697ddf56334 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4092,15 +4092,17 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, ieee80211_is_beacon(hdr->frame_control))) ieee80211_scan_rx(local, skb); - if (pubsta) { - rx.sta = container_of(pubsta, struct sta_info, sta); - rx.sdata = rx.sta->sdata; - if (ieee80211_prepare_and_rx_handle(&rx, skb, true)) - return; - goto out; - } else if (ieee80211_is_data(fc)) { + if (ieee80211_is_data(fc)) { struct sta_info *sta, *prev_sta; + if (pubsta) { + rx.sta = container_of(pubsta, struct sta_info, sta); + rx.sdata = rx.sta->sdata; + if (ieee80211_prepare_and_rx_handle(&rx, skb, true)) + return; + goto out; + } + prev_sta = NULL; for_each_sta_info(local, hdr->addr2, sta, tmp) { From 0734a3213ca83c92e27167624737fd7a2fcbcca5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 10 Feb 2017 00:00:52 -0500 Subject: [PATCH 196/357] drm/amdgpu: add more cases to DCE11 possible crtc mask setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4ce3bd45b351633f2a0512c587f7fcba2ce044e8 upstream. Add cases for asics with 3 and 5 crtcs. Fixes an artificial limitation on asics with 3 or 5 crtcs. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=99744 Reviewed-by: Michel Dänzer Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 7ddc32127d88..64a1df62cc65 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -3814,9 +3814,15 @@ static void dce_v11_0_encoder_add(struct amdgpu_device *adev, default: encoder->possible_crtcs = 0x3; break; + case 3: + encoder->possible_crtcs = 0x7; + break; case 4: encoder->possible_crtcs = 0xf; break; + case 5: + encoder->possible_crtcs = 0x1f; + break; case 6: encoder->possible_crtcs = 0x3f; break; From 54aa80e300d6e64d569962f25c50bc37436ae9de Mon Sep 17 00:00:00 2001 From: "Y.C. Chen" Date: Wed, 22 Feb 2017 15:10:50 +1100 Subject: [PATCH 197/357] drm/ast: Fix test for VGA enabled commit 905f21a49d388de3e99438235f3301cabf0c0ef4 upstream. The test to see if VGA was already enabled is doing an unnecessary second test from a register that may or may not have been initialized to a valid value. Remove it. Signed-off-by: Y.C. Chen Signed-off-by: Benjamin Herrenschmidt Acked-by: Joel Stanley Tested-by: Y.C. Chen Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_post.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c index 810c51d92b99..4e8aaab5dd52 100644 --- a/drivers/gpu/drm/ast/ast_post.c +++ b/drivers/gpu/drm/ast/ast_post.c @@ -58,13 +58,9 @@ bool ast_is_vga_enabled(struct drm_device *dev) /* TODO 1180 */ } else { ch = ast_io_read8(ast, AST_IO_VGA_ENABLE_PORT); - if (ch) { - ast_open_key(ast); - ch = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0xff); - return ch & 0x04; - } + return !!(ch & 0x01); } - return 0; + return false; } static const u8 extreginfo[] = { 0x0f, 0x04, 0x1c, 0xff }; From 5da03582e2120b04a58aabc7caa61109106e6c3d Mon Sep 17 00:00:00 2001 From: "Y.C. Chen" Date: Wed, 22 Feb 2017 15:14:19 +1100 Subject: [PATCH 198/357] drm/ast: Call open_key before enable_mmio in POST code commit 9bb92f51558f2ef5f56c257bdcea0588f31d857e upstream. open_key enables access the registers used by enable_mmio Signed-off-by: Y.C. Chen Signed-off-by: Benjamin Herrenschmidt Acked-by: Joel Stanley Tested-by: Y.C. Chen Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_post.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c index 4e8aaab5dd52..50836e549867 100644 --- a/drivers/gpu/drm/ast/ast_post.c +++ b/drivers/gpu/drm/ast/ast_post.c @@ -371,8 +371,8 @@ void ast_post_gpu(struct drm_device *dev) pci_write_config_dword(ast->dev->pdev, 0x04, reg); ast_enable_vga(dev); - ast_enable_mmio(dev); ast_open_key(ast); + ast_enable_mmio(dev); ast_set_def_ext_reg(dev); if (ast->chip == AST2300 || ast->chip == AST2400) From f96c10c2bd1ba4c438cb5e61700825f6ab376a7e Mon Sep 17 00:00:00 2001 From: "Y.C. Chen" Date: Thu, 23 Feb 2017 15:52:33 +0800 Subject: [PATCH 199/357] drm/ast: Fix AST2400 POST failure without BMC FW or VBIOS commit 3856081eede297b617560b85e948cfb00bb395ec upstream. The current POST code for the AST2300/2400 family doesn't work properly if the chip hasn't been initialized previously by either the BMC own FW or the VBIOS. This fixes it. Signed-off-by: Y.C. Chen Signed-off-by: Benjamin Herrenschmidt Tested-by: Y.C. Chen Acked-by: Joel Stanley Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_post.c | 38 +++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c index 50836e549867..30672a3df8a9 100644 --- a/drivers/gpu/drm/ast/ast_post.c +++ b/drivers/gpu/drm/ast/ast_post.c @@ -1626,12 +1626,44 @@ static void ast_init_dram_2300(struct drm_device *dev) temp |= 0x73; ast_write32(ast, 0x12008, temp); + param.dram_freq = 396; param.dram_type = AST_DDR3; + temp = ast_mindwm(ast, 0x1e6e2070); if (temp & 0x01000000) param.dram_type = AST_DDR2; - param.dram_chipid = ast->dram_type; - param.dram_freq = ast->mclk; - param.vram_size = ast->vram_size; + switch (temp & 0x18000000) { + case 0: + param.dram_chipid = AST_DRAM_512Mx16; + break; + default: + case 0x08000000: + param.dram_chipid = AST_DRAM_1Gx16; + break; + case 0x10000000: + param.dram_chipid = AST_DRAM_2Gx16; + break; + case 0x18000000: + param.dram_chipid = AST_DRAM_4Gx16; + break; + } + switch (temp & 0x0c) { + default: + case 0x00: + param.vram_size = AST_VIDMEM_SIZE_8M; + break; + + case 0x04: + param.vram_size = AST_VIDMEM_SIZE_16M; + break; + + case 0x08: + param.vram_size = AST_VIDMEM_SIZE_32M; + break; + + case 0x0c: + param.vram_size = AST_VIDMEM_SIZE_64M; + break; + } if (param.dram_type == AST_DDR3) { get_ddr3_info(ast, ¶m); From ec8e40b117c3064cb826a4f25d77e45a7f87d602 Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Mon, 20 Feb 2017 16:25:45 +0100 Subject: [PATCH 200/357] drm/edid: Add EDID_QUIRK_FORCE_8BPC quirk for Rotel RSX-1058 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 36fc579761b50784b63dafd0f2e796b659e0f5ee upstream. Rotel RSX-1058 is a receiver with 4 HDMI inputs and a HDMI output, all 1.1. When a sink that supports deep color is connected to the output, the receiver will send EDIDs that advertise this capability, even if it isn't possible with HDMI versions earlier than 1.3. Currently the kernel is assuming that deep color is possible and the sink displays an error. This quirk will make sure that deep color isn't used with this particular receiver. Fixes: 7a0baa623446 ("Revert "drm/i915: Disable 12bpc hdmi for now"") Signed-off-by: Tomeu Vizoso Link: http://patchwork.freedesktop.org/patch/msgid/20170220152545.13153-1-tomeu.vizoso@collabora.com Cc: Matt Horan Tested-by: Matt Horan Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99869 Reviewed-by: Ville Syrjälä Signed-off-by: Ville Syrjälä Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_edid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index ec77bd3e1f08..7491180698d1 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -145,6 +145,9 @@ static struct edid_quirk { /* Panel in Samsung NP700G7A-S01PL notebook reports 6bpc */ { "SEC", 0xd033, EDID_QUIRK_FORCE_8BPC }, + + /* Rotel RSX-1058 forwards sink's EDID but only does HDMI 1.1*/ + { "ETR", 13896, EDID_QUIRK_FORCE_8BPC }, }; /* From 55f47122c2e8d77a2bdb0f4bd7da57423f947824 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Wed, 25 Jan 2017 17:21:31 +0900 Subject: [PATCH 201/357] drm/ttm: Make sure BOs being swapped out are cacheable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 239ac65fa5ffab71adf66e642750f940e7241d99 upstream. The current caching state may not be tt_cached, even though the placement contains TTM_PL_FLAG_CACHED, because placement can contain multiple caching flags. Trying to swap out such a BO would trip up the BUG_ON(ttm->caching_state != tt_cached); in ttm_tt_swapout. Signed-off-by: Michel Dänzer Reviewed-by: Thomas Hellstrom Reviewed-by: Christian König . Reviewed-by: Sinclair Yeh Signed-off-by: Christian König Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ttm/ttm_bo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index fc6217dfe401..35cc16f9fec9 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1654,7 +1654,6 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) struct ttm_buffer_object *bo; int ret = -EBUSY; int put_count; - uint32_t swap_placement = (TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM); spin_lock(&glob->lru_lock); list_for_each_entry(bo, &glob->swap_lru, swap) { @@ -1685,7 +1684,8 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) * Move to system cached */ - if ((bo->mem.placement & swap_placement) != swap_placement) { + if (bo->mem.mem_type != TTM_PL_SYSTEM || + bo->ttm->caching_state != tt_cached) { struct ttm_mem_reg evict_mem; evict_mem = bo->mem; From 56a567de553e0ad1440b111077dce521ecdd0cd8 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 21 Feb 2017 17:42:27 +0700 Subject: [PATCH 202/357] drm/vmwgfx: Work around drm removal of control nodes commit 31788ca803a0c89078f9e604e64286fbd9077926 upstream. vmware tools has a daemon that gets layout information from the GUI and forwards it to DRM so that the modesetting code can set preferred connector locations and modes. This daemon was using control nodes but since control nodes were just removed, make it possible for the daemon to use render- or primary nodes instead. This is a bit ugly but will allow drm to proceed with removal of the mostly unused control-node code and allow vmware to proceed with fixing up automatic layout settings for gnome-shell/wayland. We bump minor to inform user-space about the api change. Signed-off-by: Thomas Hellstrom Reviewed-by: Sinclair Yeh Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170221104227.2854-1-thellstrom@vmware.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 11 ++++++++++- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 4 ++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 18061a4bc2f2..36005bdf3749 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -199,9 +199,14 @@ static const struct drm_ioctl_desc vmw_ioctls[] = { VMW_IOCTL_DEF(VMW_PRESENT_READBACK, vmw_present_readback_ioctl, DRM_MASTER | DRM_AUTH), + /* + * The permissions of the below ioctl are overridden in + * vmw_generic_ioctl(). We require either + * DRM_MASTER or capable(CAP_SYS_ADMIN). + */ VMW_IOCTL_DEF(VMW_UPDATE_LAYOUT, vmw_kms_update_layout_ioctl, - DRM_MASTER | DRM_CONTROL_ALLOW), + DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_CREATE_SHADER, vmw_shader_define_ioctl, DRM_AUTH | DRM_RENDER_ALLOW), @@ -1125,6 +1130,10 @@ static long vmw_generic_ioctl(struct file *filp, unsigned int cmd, return (long) vmw_execbuf_ioctl(dev, arg, file_priv, _IOC_SIZE(cmd)); + } else if (nr == DRM_COMMAND_BASE + DRM_VMW_UPDATE_LAYOUT) { + if (!drm_is_current_master(file_priv) && + !capable(CAP_SYS_ADMIN)) + return -EACCES; } if (unlikely(ioctl->cmd != cmd)) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 1e59a486bba8..59ff4197173a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -41,9 +41,9 @@ #include #include "vmwgfx_fence.h" -#define VMWGFX_DRIVER_DATE "20160210" +#define VMWGFX_DRIVER_DATE "20170221" #define VMWGFX_DRIVER_MAJOR 2 -#define VMWGFX_DRIVER_MINOR 11 +#define VMWGFX_DRIVER_MINOR 12 #define VMWGFX_DRIVER_PATCHLEVEL 0 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024) From 3a654a85932fa7a909b11934df7657c1a2cb3609 Mon Sep 17 00:00:00 2001 From: Nandor Han Date: Tue, 11 Oct 2016 14:13:41 +0300 Subject: [PATCH 203/357] dmaengine: imx-sdma - correct the dma transfer residue calculation commit 85f57752b33cf12f1d583f0c10b752292de00abe upstream. The residue calculation was taking in consideration that dma transaction status will be always retrieved in the dma callback used to inform that dma transfer is complete. However this is not the case for all subsystems that use dma. Some subsystems use a timer to check the dma status periodically. Therefore the calculation was updated and residue is calculated accordingly by a) update the residue calculation taking in consideration the last used buffer index by using *buf_ptail* variable and b) chn_real_count (number of bytes transferred) is initialized to zero, when dma channel is created, to avoid using an uninitialized value in residue calculation when dma status is checked without waiting dma complete event. Signed-off-by: Nandor Han Acked-by: Peter Senna Tschudin Tested-by: Peter Senna Tschudin Tested-by: Marek Vasut Signed-off-by: Vinod Koul Cc: Fabio Estevam Signed-off-by: Greg Kroah-Hartman --- drivers/dma/imx-sdma.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index b9629b2bfc05..d1651a50c349 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -298,6 +298,7 @@ struct sdma_engine; * @event_id1 for channels that use 2 events * @word_size peripheral access size * @buf_tail ID of the buffer that was processed + * @buf_ptail ID of the previous buffer that was processed * @num_bd max NUM_BD. number of descriptors currently handling */ struct sdma_channel { @@ -309,6 +310,7 @@ struct sdma_channel { unsigned int event_id1; enum dma_slave_buswidth word_size; unsigned int buf_tail; + unsigned int buf_ptail; unsigned int num_bd; unsigned int period_len; struct sdma_buffer_descriptor *bd; @@ -700,6 +702,8 @@ static void sdma_update_channel_loop(struct sdma_channel *sdmac) sdmac->chn_real_count = bd->mode.count; bd->mode.status |= BD_DONE; bd->mode.count = sdmac->period_len; + sdmac->buf_ptail = sdmac->buf_tail; + sdmac->buf_tail = (sdmac->buf_tail + 1) % sdmac->num_bd; /* * The callback is called from the interrupt context in order @@ -710,9 +714,6 @@ static void sdma_update_channel_loop(struct sdma_channel *sdmac) dmaengine_desc_get_callback_invoke(&sdmac->desc, NULL); - sdmac->buf_tail++; - sdmac->buf_tail %= sdmac->num_bd; - if (error) sdmac->status = old_status; } @@ -1186,6 +1187,8 @@ static struct dma_async_tx_descriptor *sdma_prep_slave_sg( sdmac->flags = 0; sdmac->buf_tail = 0; + sdmac->buf_ptail = 0; + sdmac->chn_real_count = 0; dev_dbg(sdma->dev, "setting up %d entries for channel %d.\n", sg_len, channel); @@ -1288,6 +1291,8 @@ static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic( sdmac->status = DMA_IN_PROGRESS; sdmac->buf_tail = 0; + sdmac->buf_ptail = 0; + sdmac->chn_real_count = 0; sdmac->period_len = period_len; sdmac->flags |= IMX_DMA_SG_LOOP; @@ -1385,7 +1390,7 @@ static enum dma_status sdma_tx_status(struct dma_chan *chan, u32 residue; if (sdmac->flags & IMX_DMA_SG_LOOP) - residue = (sdmac->num_bd - sdmac->buf_tail) * + residue = (sdmac->num_bd - sdmac->buf_ptail) * sdmac->period_len - sdmac->chn_real_count; else residue = sdmac->chn_count - sdmac->chn_real_count; From 98620b564fadc0f997194b3da0393b78b2c17447 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 8 Feb 2017 10:47:49 -0200 Subject: [PATCH 204/357] drm/imx: imx-tve: Do not set the regulator voltage commit fc12bccda8b6f5c38139eceec9e369ed78091b2b upstream. Commit deb65870b5d9d ("drm/imx: imx-tve: check the value returned by regulator_set_voltage()") exposes the following probe issue: 63ff0000.tve supply dac not found, using dummy regulator imx-drm display-subsystem: failed to bind 63ff0000.tve (ops imx_tve_ops): -22 When the 'dac-supply' is not passed in the device tree a dummy regulator is used and setting its voltage is not allowed. To fix this issue, do not set the dac-supply voltage inside the driver and let its voltage be specified in the device tree. Print a warning if the the 'dac-supply' voltage has a value different from 2.75V. Fixes: deb65870b5d9d ("drm/imx: imx-tve: check the value returned by regulator_set_voltage()") Suggested-by: Lucas Stach Signed-off-by: Fabio Estevam Signed-off-by: Philipp Zabel Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/imx/imx-tve.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c index 8fc088843e55..89cf0090feac 100644 --- a/drivers/gpu/drm/imx/imx-tve.c +++ b/drivers/gpu/drm/imx/imx-tve.c @@ -98,6 +98,8 @@ /* TVE_TST_MODE_REG */ #define TVE_TVDAC_TEST_MODE_MASK (0x7 << 0) +#define IMX_TVE_DAC_VOLTAGE 2750000 + enum { TVE_MODE_TVOUT, TVE_MODE_VGA, @@ -628,9 +630,8 @@ static int imx_tve_bind(struct device *dev, struct device *master, void *data) tve->dac_reg = devm_regulator_get(dev, "dac"); if (!IS_ERR(tve->dac_reg)) { - ret = regulator_set_voltage(tve->dac_reg, 2750000, 2750000); - if (ret) - return ret; + if (regulator_get_voltage(tve->dac_reg) != IMX_TVE_DAC_VOLTAGE) + dev_warn(dev, "dac voltage is not %d uV\n", IMX_TVE_DAC_VOLTAGE); ret = regulator_enable(tve->dac_reg); if (ret) return ret; From f72b751f434062deaf16a1aca2c5f53ddf71ed8f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Feb 2017 02:46:01 +0300 Subject: [PATCH 205/357] drm/atomic: fix an error code in mode_fixup() commit f9ad86e42d0303eeb8e0d41bb208153022ebd9d2 upstream. Having "ret" be a bool type works for everything except ret = funcs->atomic_check(). The other functions all return zero on error but ->atomic_check() returns negative error codes. We want to propagate the error code but instead we return 1. I found this bug with static analysis and I don't know if it affects run time. Fixes: 4cd4df8080a3 ("drm/atomic: Add ->atomic_check() to encoder helpers") Signed-off-by: Dan Carpenter Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170207234601.GA23981@mwanda Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_atomic_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index a05bb3891119..2e42a0584a84 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -362,7 +362,7 @@ mode_fixup(struct drm_atomic_state *state) struct drm_connector *connector; struct drm_connector_state *conn_state; int i; - bool ret; + int ret; for_each_crtc_in_state(state, crtc, crtc_state, i) { if (!crtc_state->mode_changed && From 868a747c77cbf1d5e183dbceb912baa4db2be913 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 9 Nov 2016 10:39:05 +0000 Subject: [PATCH 206/357] drm/i915/gvt: Disable access to stolen memory as a guest commit ddd09373628adcbdc3f7b9098d22328834f8d772 upstream. Explicitly disable stolen memory when running as a guest in a virtual machine, since the memory is not mediated between clients and reserved entirely for the host. The actual size should be reported as zero, but like every other quirk we want to tell the user what is happening. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99028 Signed-off-by: Chris Wilson Cc: Zhenyu Wang Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161109103905.17860-1-chris@chris-wilson.co.uk Reviewed-by: Zhenyu Wang (cherry picked from commit 04a68a35ce6d7b54749989f943993020f48fed62) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_stolen.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 9a71ed546b90..f46aac1e85fb 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -415,6 +415,11 @@ int i915_gem_init_stolen(struct drm_device *dev) mutex_init(&dev_priv->mm.stolen_lock); + if (intel_vgpu_active(dev_priv)) { + DRM_INFO("iGVT-g active, disabling use of stolen memory\n"); + return 0; + } + #ifdef CONFIG_INTEL_IOMMU if (intel_iommu_gfx_mapped && INTEL_INFO(dev)->gen < 8) { DRM_INFO("DMAR active, disabling use of stolen memory\n"); From 9edc456fe621f3ba33859ea3ad3c285e544db9ad Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Feb 2017 12:49:55 +0000 Subject: [PATCH 207/357] drm: Cancel drm_fb_helper_dirty_work on unload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f21b9a92ca7c29382909eaab9facc2cf46f2cc0b upstream. We can not allow the worker to run after its fbdev, or even the module, has been removed. Fixes: eaa434defaca ("drm/fb-helper: Add fb_deferred_io support") Signed-off-by: Chris Wilson Cc: Noralf Trønnes Cc: Daniel Vetter Cc: Jani Nikula Cc: Sean Paul Cc: dri-devel@lists.freedesktop.org Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170207124956.14954-1-chris@chris-wilson.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_fb_helper.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 6c75e62c0b22..cc8938bb9ff4 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -848,6 +848,8 @@ void drm_fb_helper_fini(struct drm_fb_helper *fb_helper) if (!drm_fbdev_emulation) return; + cancel_work_sync(&fb_helper->dirty_work); + if (!list_empty(&fb_helper->kernel_fb_list)) { list_del(&fb_helper->kernel_fb_list); if (list_empty(&kernel_fb_helper_list)) { From 967e17bcc96da9ab48fd621a6859d1ca55e82df9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Feb 2017 12:49:56 +0000 Subject: [PATCH 208/357] drm: Cancel drm_fb_helper_resume_work on unload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 24f76b2c87ed68f79f9f0705b11ccbefaaa0d390 upstream. We can not allow the worker to run after its fbdev, or even the module, has been removed. Fixes: cfe63423d9be ("drm/fb-helper: Add drm_fb_helper_set_suspend_unlocked()") Signed-off-by: Chris Wilson Cc: Noralf Trønnes Cc: Daniel Vetter Cc: Jani Nikula Cc: Sean Paul Cc: dri-devel@lists.freedesktop.org Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170207124956.14954-2-chris@chris-wilson.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_fb_helper.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index cc8938bb9ff4..6a48d6637e5c 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -848,6 +848,7 @@ void drm_fb_helper_fini(struct drm_fb_helper *fb_helper) if (!drm_fbdev_emulation) return; + cancel_work_sync(&fb_helper->resume_work); cancel_work_sync(&fb_helper->dirty_work); if (!list_empty(&fb_helper->kernel_fb_list)) { From 686ea5862eb61e070881c221271f6563cb74a7f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 8 Feb 2017 19:52:54 +0200 Subject: [PATCH 209/357] drm/i915: Avoid spurious WARNs about the wrong pipe in the PPS code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit aa9323dd49b23932a09023012f050556de64f118 upstream. Until recently vlv_steal_power_sequencer() wasn't being called for normal DP ports, and hence it could assert that it should only be called for pipe A and B (since pipe C doesn't support eDP). However that changed when we started to consider normal DP ports as well when choosing a PPS. So we will now get spurious warnings when vlv_steal_power_sequencer() does get called for pipe C. Avoid this by moving the WARN down into vlv_detach_power_sequencer() where this assertion should still hold. Cc: Imre Deak Fixes: 9f2bdb006a7e ("drm/i915: Prevent PPS stealing from a normal DP port on VLV/CHV") References: https://bugs.freedesktop.org/show_bug.cgi?id=95287 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170208175254.10958-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak (cherry picked from commit d158694f452252d0fef335a775aeb3eb74fe7af0) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 055525013d2f..7b06280b23aa 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2832,6 +2832,9 @@ static void vlv_detach_power_sequencer(struct intel_dp *intel_dp) enum pipe pipe = intel_dp->pps_pipe; i915_reg_t pp_on_reg = PP_ON_DELAYS(pipe); + if (WARN_ON(pipe != PIPE_A && pipe != PIPE_B)) + return; + edp_panel_vdd_off_sync(intel_dp); /* @@ -2859,9 +2862,6 @@ static void vlv_steal_power_sequencer(struct drm_device *dev, lockdep_assert_held(&dev_priv->pps_mutex); - if (WARN_ON(pipe != PIPE_A && pipe != PIPE_B)) - return; - for_each_intel_encoder(dev, encoder) { struct intel_dp *intel_dp; enum port port; From ff3bcdc9b5f00809a18cc1a3ea42a72e00b1a895 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Feb 2017 18:12:38 +0200 Subject: [PATCH 210/357] drm/i915: Fix not finding the VBT when it overlaps with OPREGION_ASLE_EXT commit 998d75730b40afc218c059d811869abe9676b305 upstream. If there is no OPREGION_ASLE_EXT then a VBT stored in mailbox #4 may use the ASLE_EXT parts of the opregion. Adjust the vbt_size calculation for a vbt in mailbox #4 for this. This fixes the driver not finding the VBT on a jumper ezpad mini3 cherrytrail tablet and on a ACER SW5_017 machine. Signed-off-by: Hans de Goede Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487088758-30050-1-git-send-email-jani.nikula@intel.com (cherry picked from commit dfb65e71ea2c1d97ac373cc0587dc60b3307581a) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_opregion.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index 7acbbbf97833..4534e4cadccf 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -1031,7 +1031,18 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) opregion->vbt_size = vbt_size; } else { vbt = base + OPREGION_VBT_OFFSET; - vbt_size = OPREGION_ASLE_EXT_OFFSET - OPREGION_VBT_OFFSET; + /* + * The VBT specification says that if the ASLE ext + * mailbox is not used its area is reserved, but + * on some CHT boards the VBT extends into the + * ASLE ext area. Allow this even though it is + * against the spec, so we do not end up rejecting + * the VBT on those boards (and end up not finding the + * LCD panel because of this). + */ + vbt_size = (mboxes & MBOX_ASLE_EXT) ? + OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE; + vbt_size -= OPREGION_VBT_OFFSET; if (intel_bios_is_valid_vbt(vbt, vbt_size)) { DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (Mailbox #4)\n"); opregion->vbt = vbt; From 290215a2abfd9c0ede35dfc753439ef61cfd679b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Jan 2017 12:06:09 +0100 Subject: [PATCH 211/357] libceph: use BUG() instead of BUG_ON(1) commit d24cdcd3e40a6825135498e11c20c7976b9bf545 upstream. I ran into this compile warning, which is the result of BUG_ON(1) not always leading to the compiler treating the code path as unreachable: include/linux/ceph/osdmap.h: In function 'ceph_can_shift_osds': include/linux/ceph/osdmap.h:62:1: error: control reaches end of non-void function [-Werror=return-type] Using BUG() here avoids the warning. Signed-off-by: Arnd Bergmann Signed-off-by: Ilya Dryomov Cc: Heinrich Schuchardt Signed-off-by: Greg Kroah-Hartman --- include/linux/ceph/osdmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 9a9041784dcf..412906609954 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -57,7 +57,7 @@ static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool) case CEPH_POOL_TYPE_EC: return false; default: - BUG_ON(1); + BUG(); } } From 91cdd9d79616ccea16e2e26fe329a15d226feb0b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 9 Mar 2017 16:16:42 -0800 Subject: [PATCH 212/357] x86, mm: fix gup_pte_range() vs DAX mappings commit ef947b2529f918d9606533eb9c32b187ed6a5ede upstream. gup_pte_range() fails to check pte_allows_gup() before translating a DAX pte entry, pte_devmap(), to a page. This allows writes to read-only mappings, and bypasses the DAX cacheline dirty tracking due to missed 'mkwrite' faults. The gup_huge_pmd() path and the gup_huge_pud() path correctly check pte_allows_gup() before checking for _devmap() entries. Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Link: http://lkml.kernel.org/r/148804251312.36605.12665024794196605053.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ross Zwisler Signed-off-by: Dan Williams Reported-by: Dave Hansen Reported-by: Ross Zwisler Cc: Xiong Zhou Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/gup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 0d4fb3ebbbac..1680768d392c 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -120,6 +120,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, return 0; } + if (!pte_allows_gup(pte_val(pte), write)) { + pte_unmap(ptep); + return 0; + } + if (pte_devmap(pte)) { pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); if (unlikely(!pgmap)) { @@ -127,8 +132,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, pte_unmap(ptep); return 0; } - } else if (!pte_allows_gup(pte_val(pte), write) || - pte_special(pte)) { + } else if (pte_special(pte)) { pte_unmap(ptep); return 0; } From 2f18b39499b23579a60e8ed1d2c0b56154388323 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 11 Mar 2017 01:31:19 +0100 Subject: [PATCH 213/357] x86/tlb: Fix tlb flushing when lguest clears PGE commit 2c4ea6e28dbf15ab93632c5c189f3948366b8885 upstream. Fengguang reported random corruptions from various locations on x86-32 after commits d2852a224050 ("arch: add ARCH_HAS_SET_MEMORY config") and 9d876e79df6a ("bpf: fix unlocking of jited image when module ronx not set") that uses the former. While x86-32 doesn't have a JIT like x86_64, the bpf_prog_lock_ro() and bpf_prog_unlock_ro() got enabled due to ARCH_HAS_SET_MEMORY, whereas Fengguang's test kernel doesn't have module support built in and therefore never had the DEBUG_SET_MODULE_RONX setting enabled. After investigating the crashes further, it turned out that using set_memory_ro() and set_memory_rw() didn't have the desired effect, for example, setting the pages as read-only on x86-32 would still let probe_kernel_write() succeed without error. This behavior would manifest itself in situations where the vmalloc'ed buffer was accessed prior to set_memory_*() such as in case of bpf_prog_alloc(). In cases where it wasn't, the page attribute changes seemed to have taken effect, leading to the conclusion that a TLB invalidate didn't happen. Moreover, it turned out that this issue reproduced with qemu in "-cpu kvm64" mode, but not for "-cpu host". When the issue occurs, change_page_attr_set_clr() did trigger a TLB flush as expected via __flush_tlb_all() through cpa_flush_range(), though. There are 3 variants for issuing a TLB flush: invpcid_flush_all() (depends on CPU feature bits X86_FEATURE_INVPCID, X86_FEATURE_PGE), cr4 based flush (depends on X86_FEATURE_PGE), and cr3 based flush. For "-cpu host" case in my setup, the flush used invpcid_flush_all() variant, whereas for "-cpu kvm64", the flush was cr4 based. Switching the kvm64 case to cr3 manually worked fine, and further investigating the cr4 one turned out that X86_CR4_PGE bit was not set in cr4 register, meaning the __native_flush_tlb_global_irq_disabled() wrote cr4 twice with the same value instead of clearing X86_CR4_PGE in the first write to trigger the flush. It turned out that X86_CR4_PGE was cleared from cr4 during init from lguest_arch_host_init() via adjust_pge(). The X86_FEATURE_PGE bit is also cleared from there due to concerns of using PGE in guest kernel that can lead to hard to trace bugs (see bff672e630a0 ("lguest: documentation V: Host") in init()). The CPU feature bits are cleared in dynamic boot_cpu_data, but they never propagated to __flush_tlb_all() as it uses static_cpu_has() instead of boot_cpu_has() for testing which variant of TLB flushing to use, meaning they still used the old setting of the host kernel. Clearing via setup_clear_cpu_cap(X86_FEATURE_PGE) so this would propagate to static_cpu_has() checks is too late at this point as sections have been patched already, so for now, it seems reasonable to switch back to boot_cpu_has(X86_FEATURE_PGE) as it was prior to commit c109bf95992b ("x86/cpufeature: Remove cpu_has_pge"). This lets the TLB flush trigger via cr3 as originally intended, properly makes the new page attributes visible and thus fixes the crashes seen by Fengguang. Fixes: c109bf95992b ("x86/cpufeature: Remove cpu_has_pge") Reported-by: Fengguang Wu Signed-off-by: Daniel Borkmann Cc: bp@suse.de Cc: Kees Cook Cc: "David S. Miller" Cc: netdev@vger.kernel.org Cc: Rusty Russell Cc: Alexei Starovoitov Cc: Linus Torvalds Cc: lkp@01.org Cc: Laura Abbott Link: http://lkml.kernrl.org/r/20170301125426.l4nf65rx4wahohyl@wfg-t540p.sh.intel.com Link: http://lkml.kernel.org/r/25c41ad9eca164be4db9ad84f768965b7eb19d9e.1489191673.git.daniel@iogearbox.net Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/tlbflush.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6fa85944af83..fc5abff9b7fd 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -188,7 +188,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) static inline void __flush_tlb_all(void) { - if (static_cpu_has(X86_FEATURE_PGE)) + if (boot_cpu_has(X86_FEATURE_PGE)) __flush_tlb_global(); else __flush_tlb(); From 1771fc58a35d6c61861c08a190ecf13cd70a38eb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 9 Mar 2017 16:17:23 -0800 Subject: [PATCH 214/357] thp: fix another corner case of munlock() vs. THPs commit 6ebb4a1b848fe75323135f93e72c78f8780fd268 upstream. The following test case triggers BUG() in munlock_vma_pages_range(): int main(int argc, char *argv[]) { int fd; system("mount -t tmpfs -o huge=always none /mnt"); fd = open("/mnt/test", O_CREAT | O_RDWR); ftruncate(fd, 4UL << 20); mmap(NULL, 4UL << 20, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED | MAP_LOCKED, fd, 0); mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, fd, 0); munlockall(); return 0; } The second mmap() create PTE-mapping of the first huge page in file. It makes kernel munlock the page as we never keep PTE-mapped page mlocked. On munlockall() when we handle vma created by the first mmap(), munlock_vma_page() returns page_mask == 0, as the page is not mlocked anymore. On next iteration follow_page_mask() return tail page, but page_mask is HPAGE_NR_PAGES - 1. It makes us skip to the first tail page of the next huge page and step on VM_BUG_ON_PAGE(PageMlocked(page)). The fix is not use the page_mask from follow_page_mask() at all. It has no use for us. Link: http://lkml.kernel.org/r/20170302150252.34120-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mlock.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mm/mlock.c b/mm/mlock.c index cdbed8aaa426..665ab75b5533 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -441,7 +441,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, while (start < end) { struct page *page; - unsigned int page_mask; + unsigned int page_mask = 0; unsigned long page_increm; struct pagevec pvec; struct zone *zone; @@ -455,8 +455,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, * suits munlock very well (and if somehow an abnormal page * has sneaked into the range, we won't oops here: great). */ - page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP, - &page_mask); + page = follow_page(vma, start, FOLL_GET | FOLL_DUMP); if (page && !IS_ERR(page)) { if (PageTransTail(page)) { @@ -467,8 +466,8 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, /* * Any THP page found by follow_page_mask() may * have gotten split before reaching - * munlock_vma_page(), so we need to recompute - * the page_mask here. + * munlock_vma_page(), so we need to compute + * the page_mask here instead. */ page_mask = munlock_vma_page(page); unlock_page(page); From 16ace91043bf5fa192bb6e42451524b3c6ad7bd7 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Thu, 9 Mar 2017 16:17:26 -0800 Subject: [PATCH 215/357] mm: do not call mem_cgroup_free() from within mem_cgroup_alloc() commit 40e952f9d687928b32db20226f085ae660a7237c upstream. mem_cgroup_free() indirectly calls wb_domain_exit() which is not prepared to deal with a struct wb_domain object that hasn't executed wb_domain_init(). For instance, the following warning message is printed by lockdep if alloc_percpu() fails in mem_cgroup_alloc(): INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 1 PID: 1950 Comm: mkdir Not tainted 4.10.0+ #151 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: dump_stack+0x67/0x99 register_lock_class+0x36d/0x540 __lock_acquire+0x7f/0x1a30 lock_acquire+0xcc/0x200 del_timer_sync+0x3c/0xc0 wb_domain_exit+0x14/0x20 mem_cgroup_free+0x14/0x40 mem_cgroup_css_alloc+0x3f9/0x620 cgroup_apply_control_enable+0x190/0x390 cgroup_mkdir+0x290/0x3d0 kernfs_iop_mkdir+0x58/0x80 vfs_mkdir+0x10e/0x1a0 SyS_mkdirat+0xa8/0xd0 SyS_mkdir+0x14/0x20 entry_SYSCALL_64_fastpath+0x18/0xad Add __mem_cgroup_free() which skips wb_domain_exit(). This is used by both mem_cgroup_free() and mem_cgroup_alloc() clean up. Fixes: 0b8f73e104285 ("mm: memcontrol: clean up alloc, online, offline, free functions") Link: http://lkml.kernel.org/r/20170306192122.24262-1-tahsin@google.com Signed-off-by: Tahsin Erdogan Acked-by: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4c6ade54d833..0de26691f0f5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4139,17 +4139,22 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) kfree(memcg->nodeinfo[node]); } -static void mem_cgroup_free(struct mem_cgroup *memcg) +static void __mem_cgroup_free(struct mem_cgroup *memcg) { int node; - memcg_wb_domain_exit(memcg); for_each_node(node) free_mem_cgroup_per_node_info(memcg, node); free_percpu(memcg->stat); kfree(memcg); } +static void mem_cgroup_free(struct mem_cgroup *memcg) +{ + memcg_wb_domain_exit(memcg); + __mem_cgroup_free(memcg); +} + static struct mem_cgroup *mem_cgroup_alloc(void) { struct mem_cgroup *memcg; @@ -4200,7 +4205,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void) fail: if (memcg->id.id > 0) idr_remove(&mem_cgroup_idr, memcg->id.id); - mem_cgroup_free(memcg); + __mem_cgroup_free(memcg); return NULL; } From 63e873679ba99649bf22cc7b964315591cedf4c5 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 9 Mar 2017 16:17:37 -0800 Subject: [PATCH 216/357] fat: fix using uninitialized fields of fat_inode/fsinfo_inode commit c0d0e351285161a515396b7b1ee53ec9ffd97e3c upstream. Recently fallocate patch was merged and it uses MSDOS_I(inode)->mmu_private at fat_evict_inode(). However, fat_inode/fsinfo_inode that was introduced in past didn't initialize MSDOS_I(inode) properly. With those combinations, it became the cause of accessing random entry in FAT area. Link: http://lkml.kernel.org/r/87pohrj4i8.fsf@mail.parknet.co.jp Signed-off-by: OGAWA Hirofumi Reported-by: Moreno Bartalucci Tested-by: Moreno Bartalucci Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/fat/inode.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 338d2f73eb29..a2c05f2ada6d 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1359,6 +1359,16 @@ out: return 0; } +static void fat_dummy_inode_init(struct inode *inode) +{ + /* Initialize this dummy inode to work as no-op. */ + MSDOS_I(inode)->mmu_private = 0; + MSDOS_I(inode)->i_start = 0; + MSDOS_I(inode)->i_logstart = 0; + MSDOS_I(inode)->i_attrs = 0; + MSDOS_I(inode)->i_pos = 0; +} + static int fat_read_root(struct inode *inode) { struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); @@ -1803,12 +1813,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, fat_inode = new_inode(sb); if (!fat_inode) goto out_fail; - MSDOS_I(fat_inode)->i_pos = 0; + fat_dummy_inode_init(fat_inode); sbi->fat_inode = fat_inode; fsinfo_inode = new_inode(sb); if (!fsinfo_inode) goto out_fail; + fat_dummy_inode_init(fsinfo_inode); fsinfo_inode->i_ino = MSDOS_FSINFO_INO; sbi->fsinfo_inode = fsinfo_inode; insert_inode_hash(fsinfo_inode); From 182ff0ebbdab926578728784299cc9d9c195d515 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 8 Feb 2017 18:30:56 -0700 Subject: [PATCH 217/357] drivers: hv: Turn off write permission on the hypercall page commit 372b1e91343e657a7cc5e2e2bcecd5140ac28119 upstream. The hypercall page only needs to be executable but currently it is setup to be writable as well. Fix the issue. Signed-off-by: K. Y. Srinivasan Acked-by: Kees Cook Reported-by: Stephen Hemminger Tested-by: Stephen Hemminger Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 6e49a4dd99c0..e0a8216ecf2b 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -220,7 +220,7 @@ int hv_init(void) /* See if the hypercall page is already set */ rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); - virtaddr = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_EXEC); + virtaddr = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX); if (!virtaddr) goto cleanup; From d962bf8dd954db6db513b42cc484a9528df3e5a3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 15 Mar 2017 10:03:13 +0800 Subject: [PATCH 218/357] Linux 4.9.15 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5e7706e94622..03df4fcacdf2 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 14 +SUBLEVEL = 15 EXTRAVERSION = NAME = Roaring Lionus From e1533c46151cedb8904f3f88d6c2fc0753e46c0f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 31 Jan 2017 17:17:27 +0100 Subject: [PATCH 219/357] USB: serial: digi_acceleport: fix OOB data sanity check commit 2d380889215fe20b8523345649dee0579821800c upstream. Make sure to check for short transfers to avoid underflow in a loop condition when parsing the receive buffer. Also fix an off-by-one error in the incomplete sanity check which could lead to invalid data being parsed. Fixes: 8c209e6782ca ("USB: make actual_length in struct urb field u32") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/digi_acceleport.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index 6a1df9e824ca..3b610f1e3f7c 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -1482,16 +1482,20 @@ static int digi_read_oob_callback(struct urb *urb) struct usb_serial *serial = port->serial; struct tty_struct *tty; struct digi_port *priv = usb_get_serial_port_data(port); + unsigned char *buf = urb->transfer_buffer; int opcode, line, status, val; int i; unsigned int rts; + if (urb->actual_length < 4) + return -1; + /* handle each oob command */ - for (i = 0; i < urb->actual_length - 3;) { - opcode = ((unsigned char *)urb->transfer_buffer)[i++]; - line = ((unsigned char *)urb->transfer_buffer)[i++]; - status = ((unsigned char *)urb->transfer_buffer)[i++]; - val = ((unsigned char *)urb->transfer_buffer)[i++]; + for (i = 0; i < urb->actual_length - 4; i += 4) { + opcode = buf[i]; + line = buf[i + 1]; + status = buf[i + 2]; + val = buf[i + 3]; dev_dbg(&port->dev, "digi_read_oob_callback: opcode=%d, line=%d, status=%d, val=%d\n", opcode, line, status, val); From ce4d67cb3e6ee305437a3034de239f1dd4441d44 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 24 Feb 2017 19:11:28 +0100 Subject: [PATCH 220/357] USB: serial: digi_acceleport: fix OOB-event processing commit 2e46565cf622dd0534a9d8bffe152a577b48d7aa upstream. A recent change claimed to fix an off-by-one error in the OOB-port completion handler, but instead introduced such an error. This could specifically led to modem-status changes going unnoticed, effectively breaking TIOCMGET. Note that the offending commit fixes a loop-condition underflow and is marked for stable, but should not be backported without this fix. Reported-by: Ben Hutchings Fixes: 2d380889215f ("USB: serial: digi_acceleport: fix OOB data sanity check") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/digi_acceleport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index 3b610f1e3f7c..30bf0f5db82d 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -1491,7 +1491,7 @@ static int digi_read_oob_callback(struct urb *urb) return -1; /* handle each oob command */ - for (i = 0; i < urb->actual_length - 4; i += 4) { + for (i = 0; i < urb->actual_length - 3; i += 4) { opcode = buf[i]; line = buf[i + 1]; status = buf[i + 2]; From 2c1820ea8dcc8105cf66d13c69f376de3d95f54f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 3 Feb 2017 23:33:23 +0100 Subject: [PATCH 221/357] crypto: improve gcc optimization flags for serpent and wp512 commit 7d6e9105026788c497f0ab32fa16c82f4ab5ff61 upstream. An ancient gcc bug (first reported in 2003) has apparently resurfaced on MIPS, where kernelci.org reports an overly large stack frame in the whirlpool hash algorithm: crypto/wp512.c:987:1: warning: the frame size of 1112 bytes is larger than 1024 bytes [-Wframe-larger-than=] With some testing in different configurations, I'm seeing large variations in stack frames size up to 1500 bytes for what should have around 300 bytes at most. I also checked the reference implementation, which is essentially the same code but also comes with some test and benchmarking infrastructure. It seems that recent compiler versions on at least arm, arm64 and powerpc have a partial fix for this problem, but enabling "-fsched-pressure", but even with that fix they suffer from the issue to a certain degree. Some testing on arm64 shows that the time needed to hash a given amount of data is roughly proportional to the stack frame size here, which makes sense given that the wp512 implementation is doing lots of loads for table lookups, and the problem with the overly large stack is a result of doing a lot more loads and stores for spilled registers (as seen from inspecting the object code). Disabling -fschedule-insns consistently fixes the problem for wp512, in my collection of cross-compilers, the results are consistently better or identical when comparing the stack sizes in this function, though some architectures (notable x86) have schedule-insns disabled by default. The four columns are: default: -O2 press: -O2 -fsched-pressure nopress: -O2 -fschedule-insns -fno-sched-pressure nosched: -O2 -no-schedule-insns (disables sched-pressure) default press nopress nosched alpha-linux-gcc-4.9.3 1136 848 1136 176 am33_2.0-linux-gcc-4.9.3 2100 2076 2100 2104 arm-linux-gnueabi-gcc-4.9.3 848 848 1048 352 cris-linux-gcc-4.9.3 272 272 272 272 frv-linux-gcc-4.9.3 1128 1000 1128 280 hppa64-linux-gcc-4.9.3 1128 336 1128 184 hppa-linux-gcc-4.9.3 644 308 644 276 i386-linux-gcc-4.9.3 352 352 352 352 m32r-linux-gcc-4.9.3 720 656 720 268 microblaze-linux-gcc-4.9.3 1108 604 1108 256 mips64-linux-gcc-4.9.3 1328 592 1328 208 mips-linux-gcc-4.9.3 1096 624 1096 240 powerpc64-linux-gcc-4.9.3 1088 432 1088 160 powerpc-linux-gcc-4.9.3 1080 584 1080 224 s390-linux-gcc-4.9.3 456 456 624 360 sh3-linux-gcc-4.9.3 292 292 292 292 sparc64-linux-gcc-4.9.3 992 240 992 208 sparc-linux-gcc-4.9.3 680 592 680 312 x86_64-linux-gcc-4.9.3 224 240 272 224 xtensa-linux-gcc-4.9.3 1152 704 1152 304 aarch64-linux-gcc-7.0.0 224 224 1104 208 arm-linux-gnueabi-gcc-7.0.1 824 824 1048 352 mips-linux-gcc-7.0.0 1120 648 1120 272 x86_64-linux-gcc-7.0.1 240 240 304 240 arm-linux-gnueabi-gcc-4.4.7 840 392 arm-linux-gnueabi-gcc-4.5.4 784 728 784 320 arm-linux-gnueabi-gcc-4.6.4 736 728 736 304 arm-linux-gnueabi-gcc-4.7.4 944 784 944 352 arm-linux-gnueabi-gcc-4.8.5 464 464 760 352 arm-linux-gnueabi-gcc-4.9.3 848 848 1048 352 arm-linux-gnueabi-gcc-5.3.1 824 824 1064 336 arm-linux-gnueabi-gcc-6.1.1 808 808 1056 344 arm-linux-gnueabi-gcc-7.0.1 824 824 1048 352 Trying the same test for serpent-generic, the picture is a bit different, and while -fno-schedule-insns is generally better here than the default, -fsched-pressure wins overall, so I picked that instead. default press nopress nosched alpha-linux-gcc-4.9.3 1392 864 1392 960 am33_2.0-linux-gcc-4.9.3 536 524 536 528 arm-linux-gnueabi-gcc-4.9.3 552 552 776 536 cris-linux-gcc-4.9.3 528 528 528 528 frv-linux-gcc-4.9.3 536 400 536 504 hppa64-linux-gcc-4.9.3 524 208 524 480 hppa-linux-gcc-4.9.3 768 472 768 508 i386-linux-gcc-4.9.3 564 564 564 564 m32r-linux-gcc-4.9.3 712 576 712 532 microblaze-linux-gcc-4.9.3 724 392 724 512 mips64-linux-gcc-4.9.3 720 384 720 496 mips-linux-gcc-4.9.3 728 384 728 496 powerpc64-linux-gcc-4.9.3 704 304 704 480 powerpc-linux-gcc-4.9.3 704 296 704 480 s390-linux-gcc-4.9.3 560 560 592 536 sh3-linux-gcc-4.9.3 540 540 540 540 sparc64-linux-gcc-4.9.3 544 352 544 496 sparc-linux-gcc-4.9.3 544 344 544 496 x86_64-linux-gcc-4.9.3 528 536 576 528 xtensa-linux-gcc-4.9.3 752 544 752 544 aarch64-linux-gcc-7.0.0 432 432 656 480 arm-linux-gnueabi-gcc-7.0.1 616 616 808 536 mips-linux-gcc-7.0.0 720 464 720 488 x86_64-linux-gcc-7.0.1 536 528 600 536 arm-linux-gnueabi-gcc-4.4.7 592 440 arm-linux-gnueabi-gcc-4.5.4 776 448 776 544 arm-linux-gnueabi-gcc-4.6.4 776 448 776 544 arm-linux-gnueabi-gcc-4.7.4 768 448 768 544 arm-linux-gnueabi-gcc-4.8.5 488 488 776 544 arm-linux-gnueabi-gcc-4.9.3 552 552 776 536 arm-linux-gnueabi-gcc-5.3.1 552 552 776 536 arm-linux-gnueabi-gcc-6.1.1 560 560 776 536 arm-linux-gnueabi-gcc-7.0.1 616 616 808 536 I did not do any runtime tests with serpent, so it is possible that stack frame size does not directly correlate with runtime performance here and it actually makes things worse, but it's more likely to help here, and the reduced stack frame size is probably enough reason to apply the patch, especially given that the crypto code is often used in deep call chains. Link: https://kernelci.org/build/id/58797d7559b5149efdf6c3a9/logs/ Link: http://www.larc.usp.br/~pbarreto/WhirlpoolPage.html Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=11488 Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 Cc: Ralf Baechle Signed-off-by: Arnd Bergmann Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crypto/Makefile b/crypto/Makefile index bd6a029094e6..9e52b3c528df 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -71,6 +71,7 @@ obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o obj-$(CONFIG_CRYPTO_SHA3) += sha3_generic.o obj-$(CONFIG_CRYPTO_WP512) += wp512.o +CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o obj-$(CONFIG_CRYPTO_ECB) += ecb.o @@ -94,6 +95,7 @@ obj-$(CONFIG_CRYPTO_BLOWFISH_COMMON) += blowfish_common.o obj-$(CONFIG_CRYPTO_TWOFISH) += twofish_generic.o obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o +CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 obj-$(CONFIG_CRYPTO_AES) += aes_generic.o obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o From 5841e3d37db9ef1f9e154ae5059cd62e1a5aa934 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jan 2017 15:29:48 +0100 Subject: [PATCH 222/357] MIPS: Update defconfigs for NF_CT_PROTO_DCCP/UDPLITE change commit 9ddc16ad8e0bc7742fc96d5aaabc5b8698512cd1 upstream. In linux-4.10-rc, NF_CT_PROTO_UDPLITE and NF_CT_PROTO_DCCP are bool symbols instead of tristate, and kernelci.org reports a bunch of warnings for this, like: arch/mips/configs/malta_kvm_guest_defconfig:63:warning: symbol value 'm' invalid for NF_CT_PROTO_UDPLITE arch/mips/configs/malta_defconfig:62:warning: symbol value 'm' invalid for NF_CT_PROTO_DCCP arch/mips/configs/malta_defconfig:63:warning: symbol value 'm' invalid for NF_CT_PROTO_UDPLITE arch/mips/configs/ip22_defconfig:70:warning: symbol value 'm' invalid for NF_CT_PROTO_DCCP arch/mips/configs/ip22_defconfig:71:warning: symbol value 'm' invalid for NF_CT_PROTO_UDPLITE This changes all the MIPS defconfigs with these symbols to have them built-in. Fixes: 9b91c96c5d1f ("netfilter: conntrack: built-in support for UDPlite") Fixes: c51d39010a1b ("netfilter: conntrack: built-in support for DCCP") Signed-off-by: Arnd Bergmann Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14999/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/configs/ip22_defconfig | 4 ++-- arch/mips/configs/malta_defconfig | 4 ++-- arch/mips/configs/malta_kvm_defconfig | 4 ++-- arch/mips/configs/malta_kvm_guest_defconfig | 4 ++-- arch/mips/configs/maltaup_xpa_defconfig | 4 ++-- arch/mips/configs/nlm_xlp_defconfig | 2 +- arch/mips/configs/nlm_xlr_defconfig | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig index 5d83ff755547..ec8e9684296d 100644 --- a/arch/mips/configs/ip22_defconfig +++ b/arch/mips/configs/ip22_defconfig @@ -67,8 +67,8 @@ CONFIG_NETFILTER_NETLINK_QUEUE=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CT_PROTO_DCCP=m -CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_H323=m diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 58d43f3c348d..078ecac071ab 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -59,8 +59,8 @@ CONFIG_NETFILTER=y CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CT_PROTO_DCCP=m -CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_H323=m diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig index c8f7e2835840..e233f878afef 100644 --- a/arch/mips/configs/malta_kvm_defconfig +++ b/arch/mips/configs/malta_kvm_defconfig @@ -60,8 +60,8 @@ CONFIG_NETFILTER=y CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CT_PROTO_DCCP=m -CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_H323=m diff --git a/arch/mips/configs/malta_kvm_guest_defconfig b/arch/mips/configs/malta_kvm_guest_defconfig index d2f54e55356c..fbe085c328ab 100644 --- a/arch/mips/configs/malta_kvm_guest_defconfig +++ b/arch/mips/configs/malta_kvm_guest_defconfig @@ -59,8 +59,8 @@ CONFIG_NETFILTER=y CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CT_PROTO_DCCP=m -CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_H323=m diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig index 3d0d9cb9673f..2942610e4082 100644 --- a/arch/mips/configs/maltaup_xpa_defconfig +++ b/arch/mips/configs/maltaup_xpa_defconfig @@ -61,8 +61,8 @@ CONFIG_NETFILTER=y CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CT_PROTO_DCCP=m -CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_H323=m diff --git a/arch/mips/configs/nlm_xlp_defconfig b/arch/mips/configs/nlm_xlp_defconfig index b496c25fced6..07d01827a973 100644 --- a/arch/mips/configs/nlm_xlp_defconfig +++ b/arch/mips/configs/nlm_xlp_defconfig @@ -110,7 +110,7 @@ CONFIG_NETFILTER=y CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_H323=m diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig index 8e99ad807a57..f59969acb724 100644 --- a/arch/mips/configs/nlm_xlr_defconfig +++ b/arch/mips/configs/nlm_xlr_defconfig @@ -90,7 +90,7 @@ CONFIG_NETFILTER=y CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_H323=m From ad8387a602280c27f979b81d95b908d46be68901 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 17 Jan 2017 16:18:36 +0100 Subject: [PATCH 223/357] MIPS: VDSO: avoid duplicate CAC_BASE definition commit 1742ac265046f34223e06d5d283496f0291be259 upstream. vdso.h includes implicitly after defining CONFIG_32BITS. This defeats the override in mach-ip27/spaces.h, leading to a build error that shows up in kernelci.org: In file included from arch/mips/include/asm/mach-ip27/spaces.h:29:0, from arch/mips/include/asm/page.h:12, from arch/mips/vdso/vdso.h:26, from arch/mips/vdso/gettimeofday.c:11: arch/mips/include/asm/mach-generic/spaces.h:28:0: error: "CAC_BASE" redefined [-Werror] #define CAC_BASE _AC(0x80000000, UL) An earlier patch tried to make the second definition conditional, but that patch had the #ifdef in the wrong place, and would lead to another warning: arch/mips/include/asm/io.h: In function 'phys_to_virt': arch/mips/include/asm/io.h:138:9: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] For all I can tell, there is no other reason than vdso32 to ever include this file with CONFIG_32BITS set, and the vdso itself should never refer to the base addresses as it is running in user space, so adding an #ifdef here is safe. Link: https://patchwork.kernel.org/patch/9418187/ Fixes: 3ffc17d8768b ("MIPS: Adjust MIPS64 CAC_BASE to reflect Config.K0") Signed-off-by: Arnd Bergmann Cc: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15039/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/mach-ip27/spaces.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/asm/mach-ip27/spaces.h b/arch/mips/include/asm/mach-ip27/spaces.h index 4775a1136a5b..24d5e31bcfa6 100644 --- a/arch/mips/include/asm/mach-ip27/spaces.h +++ b/arch/mips/include/asm/mach-ip27/spaces.h @@ -12,14 +12,16 @@ /* * IP27 uses the R10000's uncached attribute feature. Attribute 3 selects - * uncached memory addressing. + * uncached memory addressing. Hide the definitions on 32-bit compilation + * of the compat-vdso code. */ - +#ifdef CONFIG_64BIT #define HSPEC_BASE 0x9000000000000000 #define IO_BASE 0x9200000000000000 #define MSPEC_BASE 0x9400000000000000 #define UNCAC_BASE 0x9600000000000000 #define CAC_BASE 0xa800000000000000 +#endif #define TO_MSPEC(x) (MSPEC_BASE | ((x) & TO_PHYS_MASK)) #define TO_HSPEC(x) (HSPEC_BASE | ((x) & TO_PHYS_MASK)) From 23096c56787e27b5eddf38874593806df0f819f3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 3 Feb 2017 17:43:50 +0100 Subject: [PATCH 224/357] MIPS: ip27: Disable qlge driver in defconfig commit b617649468390713db1515ea79fc772d2eb897a8 upstream. One of the last remaining failures in kernelci.org is for a gcc bug: drivers/net/ethernet/qlogic/qlge/qlge_main.c:4819:1: error: insn does not satisfy its constraints: drivers/net/ethernet/qlogic/qlge/qlge_main.c:4819:1: internal compiler error: in extract_constrain_insn, at recog.c:2190 This is apparently broken in gcc-6 but fixed in gcc-7, and I cannot reproduce the problem here. However, it is clear that ip27_defconfig does not actually need this driver as the platform has only PCI-X but not PCIe, and the qlge adapter in turn is PCIe-only. The driver was originally enabled in 2010 along with lots of other drivers. Fixes: 59d302b342e5 ("MIPS: IP27: Make defconfig useful again.") Signed-off-by: Arnd Bergmann Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15197/ Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/configs/ip27_defconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index 2b74aee320a1..a7979fd22ee3 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -205,7 +205,6 @@ CONFIG_MLX4_EN=m # CONFIG_MLX4_DEBUG is not set CONFIG_TEHUTI=m CONFIG_BNX2X=m -CONFIG_QLGE=m CONFIG_SFC=m CONFIG_BE2NET=m CONFIG_LIBERTAS_THINFIRM=m From aff853abd2988e16ed65cd0e15983aaf5312c7f6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jan 2017 15:29:50 +0100 Subject: [PATCH 225/357] MIPS: Update ip27_defconfig for SCSI_DH change commit ea58fca1842a5dc410cae4167b01643db971a4e2 upstream. Since linux-4.3, SCSI_DH is a bool symbol, causing a warning in kernelci.org: arch/mips/configs/ip27_defconfig:136:warning: symbol value 'm' invalid for SCSI_DH This updates the defconfig to have the feature built-in. Fixes: 086b91d052eb ("scsi_dh: integrate into the core SCSI code") Signed-off-by: Arnd Bergmann Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15001/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/configs/ip27_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index a7979fd22ee3..e582069b44fd 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -133,7 +133,7 @@ CONFIG_LIBFC=m CONFIG_SCSI_QLOGIC_1280=y CONFIG_SCSI_PMCRAID=m CONFIG_SCSI_BFA_FC=m -CONFIG_SCSI_DH=m +CONFIG_SCSI_DH=y CONFIG_SCSI_DH_RDAC=m CONFIG_SCSI_DH_HP_SW=m CONFIG_SCSI_DH_EMC=m From dd2ef28eb76e5cb573f18718275e607e4fc88283 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 17 Jan 2017 16:18:46 +0100 Subject: [PATCH 226/357] MIPS: ip22: Fix ip28 build for modern gcc commit 23ca9b522383d3b9b7991d8586db30118992af4a upstream. kernelci reports a failure of the ip28_defconfig build after upgrading its gcc version: arch/mips/sgi-ip22/Platform:29: *** gcc doesn't support needed option -mr10k-cache-barrier=store. Stop. The problem apparently is that the -mr10k-cache-barrier=store option is now rejected for CPUs other than r10k. Explicitly including the CPU in the check fixes this and is safe because both options were introduced in gcc-4.4. Signed-off-by: Arnd Bergmann Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15049/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/sgi-ip22/Platform | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/sgi-ip22/Platform b/arch/mips/sgi-ip22/Platform index b7a4b7e04c38..e8f6b3a42a48 100644 --- a/arch/mips/sgi-ip22/Platform +++ b/arch/mips/sgi-ip22/Platform @@ -25,7 +25,7 @@ endif # Simplified: what IP22 does at 128MB+ in ksegN, IP28 does at 512MB+ in xkphys # ifdef CONFIG_SGI_IP28 - ifeq ($(call cc-option-yn,-mr10k-cache-barrier=store), n) + ifeq ($(call cc-option-yn,-march=r10000 -mr10k-cache-barrier=store), n) $(error gcc doesn't support needed option -mr10k-cache-barrier=store) endif endif From 976e40d7f6fe15212a0ce6ef0f687130285fecab Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jan 2017 15:29:49 +0100 Subject: [PATCH 227/357] MIPS: Update lemote2f_defconfig for CPU_FREQ_STAT change commit b3f6046186ef45acfeebc5a59c9fb45cefc685e7 upstream. Since linux-4.8, CPU_FREQ_STAT is a bool symbol, causing a warning in kernelci.org: arch/mips/configs/lemote2f_defconfig:42:warning: symbol value 'm' invalid for CPU_FREQ_STAT This updates the defconfig to have the feature built-in. Fixes: 1aefc75b2449 ("cpufreq: stats: Make the stats code non-modular") Signed-off-by: Arnd Bergmann Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15000/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/configs/lemote2f_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index 5da76e0e120f..0cdb431bff80 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -39,7 +39,7 @@ CONFIG_HIBERNATION=y CONFIG_PM_STD_PARTITION="/dev/hda3" CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_DEBUG=y -CONFIG_CPU_FREQ_STAT=m +CONFIG_CPU_FREQ_STAT=y CONFIG_CPU_FREQ_STAT_DETAILS=y CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_POWERSAVE=m From 7b767f6b515d196b832d178035cb865c67937312 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 3 Feb 2017 10:49:17 +0100 Subject: [PATCH 228/357] mtd: pmcmsp: use kstrndup instead of kmalloc+strncpy commit 906b268477bc03daaa04f739844c120fe4dbc991 upstream. kernelci.org reports a warning for this driver, as it copies a local variable into a 'const char *' string: drivers/mtd/maps/pmcmsp-flash.c:149:30: warning: passing argument 1 of 'strncpy' discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers] Using kstrndup() simplifies the code and avoids the warning. Signed-off-by: Arnd Bergmann Acked-by: Marek Vasut Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/maps/pmcmsp-flash.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/mtd/maps/pmcmsp-flash.c b/drivers/mtd/maps/pmcmsp-flash.c index f9fa3fad728e..2051f28ddac6 100644 --- a/drivers/mtd/maps/pmcmsp-flash.c +++ b/drivers/mtd/maps/pmcmsp-flash.c @@ -139,15 +139,13 @@ static int __init init_msp_flash(void) } msp_maps[i].bankwidth = 1; - msp_maps[i].name = kmalloc(7, GFP_KERNEL); + msp_maps[i].name = kstrndup(flash_name, 7, GFP_KERNEL); if (!msp_maps[i].name) { iounmap(msp_maps[i].virt); kfree(msp_parts[i]); goto cleanup_loop; } - msp_maps[i].name = strncpy(msp_maps[i].name, flash_name, 7); - for (j = 0; j < pcnt; j++) { part_name[5] = '0' + i; part_name[7] = '0' + j; From f34064186f0ed2fd2a47175a11339509f8fc0034 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Tue, 20 Dec 2016 19:12:46 +0100 Subject: [PATCH 229/357] MIPS: ralink: Cosmetic change to prom_init(). commit 9c48568b3692f1a56cbf1935e4eea835e6b185b1 upstream. Over the years the code has been changed various times leading to argc/argv being defined in a different function to where we actually use the variables. Clean this up by moving them to prom_init_cmdline(). Signed-off-by: John Crispin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14902/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/ralink/prom.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/mips/ralink/prom.c b/arch/mips/ralink/prom.c index 5a73c5e14221..23198c9050e5 100644 --- a/arch/mips/ralink/prom.c +++ b/arch/mips/ralink/prom.c @@ -30,8 +30,10 @@ const char *get_system_type(void) return soc_info.sys_type; } -static __init void prom_init_cmdline(int argc, char **argv) +static __init void prom_init_cmdline(void) { + int argc; + char **argv; int i; pr_debug("prom: fw_arg0=%08x fw_arg1=%08x fw_arg2=%08x fw_arg3=%08x\n", @@ -60,14 +62,11 @@ static __init void prom_init_cmdline(int argc, char **argv) void __init prom_init(void) { - int argc; - char **argv; - prom_soc_init(&soc_info); pr_info("SoC Type: %s\n", get_system_type()); - prom_init_cmdline(argc, argv); + prom_init_cmdline(); } void __init prom_free_prom_memory(void) From dd2419e1cec0498e688069ba84e5d90fd5651496 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 17 Jan 2017 16:18:41 +0100 Subject: [PATCH 230/357] MIPS: ralink: Remove unused timer functions commit d92240d12a9c010e0094bbc9280aae4a6be9a2d5 upstream. The functions were originally used for the module unload path, but are not referenced any more and just cause warnings: arch/mips/ralink/timer.c:104:13: error: 'rt_timer_disable' defined but not used [-Werror=unused-function] arch/mips/ralink/timer.c:74:13: error: 'rt_timer_free' defined but not used [-Werror=unused-function] Cc: Paul Gortmaker Fixes: 62ee73d284e7 ("MIPS: ralink: Make timer explicitly non-modular") Signed-off-by: Arnd Bergmann Cc: Paul Gortmaker Cc: John Crispin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15041/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/ralink/timer.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/arch/mips/ralink/timer.c b/arch/mips/ralink/timer.c index 8077ff39bdea..d4469b20d176 100644 --- a/arch/mips/ralink/timer.c +++ b/arch/mips/ralink/timer.c @@ -71,11 +71,6 @@ static int rt_timer_request(struct rt_timer *rt) return err; } -static void rt_timer_free(struct rt_timer *rt) -{ - free_irq(rt->irq, rt); -} - static int rt_timer_config(struct rt_timer *rt, unsigned long divisor) { if (rt->timer_freq < divisor) @@ -101,15 +96,6 @@ static int rt_timer_enable(struct rt_timer *rt) return 0; } -static void rt_timer_disable(struct rt_timer *rt) -{ - u32 t; - - t = rt_timer_r32(rt, TIMER_REG_TMR0CTL); - t &= ~TMR0CTL_ENABLE; - rt_timer_w32(rt, TIMER_REG_TMR0CTL, t); -} - static int rt_timer_probe(struct platform_device *pdev) { struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); From b7e968da04d77174477317b1b032be7f62ad856d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 17 Jan 2017 16:18:43 +0100 Subject: [PATCH 231/357] MIPS: ralink: Remove unused rt*_wdt_reset functions commit 886f9c69fc68f56ddea34d3de51ac1fc2ac8dfbc upstream. All pointers to these functions were removed, so now they produce warnings: arch/mips/ralink/rt305x.c:92:13: error: 'rt305x_wdt_reset' defined but not used [-Werror=unused-function] This removes the functions. If we need them again, the patch can be reverted later. Fixes: f576fb6a0700 ("MIPS: ralink: cleanup the soc specific pinmux data") Signed-off-by: Arnd Bergmann Cc: John Crispin Cc: Colin Ian King Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15044/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/ralink/rt288x.c | 10 ---------- arch/mips/ralink/rt305x.c | 11 ----------- arch/mips/ralink/rt3883.c | 10 ---------- 3 files changed, 31 deletions(-) diff --git a/arch/mips/ralink/rt288x.c b/arch/mips/ralink/rt288x.c index 285796e6d75c..2b76e3643869 100644 --- a/arch/mips/ralink/rt288x.c +++ b/arch/mips/ralink/rt288x.c @@ -40,16 +40,6 @@ static struct rt2880_pmx_group rt2880_pinmux_data_act[] = { { 0 } }; -static void rt288x_wdt_reset(void) -{ - u32 t; - - /* enable WDT reset output on pin SRAM_CS_N */ - t = rt_sysc_r32(SYSC_REG_CLKCFG); - t |= CLKCFG_SRAM_CS_N_WDT; - rt_sysc_w32(t, SYSC_REG_CLKCFG); -} - void __init ralink_clk_init(void) { unsigned long cpu_rate, wmac_rate = 40000000; diff --git a/arch/mips/ralink/rt305x.c b/arch/mips/ralink/rt305x.c index c8a28c4bf29e..e778e0b54ffb 100644 --- a/arch/mips/ralink/rt305x.c +++ b/arch/mips/ralink/rt305x.c @@ -89,17 +89,6 @@ static struct rt2880_pmx_group rt5350_pinmux_data[] = { { 0 } }; -static void rt305x_wdt_reset(void) -{ - u32 t; - - /* enable WDT reset output on pin SRAM_CS_N */ - t = rt_sysc_r32(SYSC_REG_SYSTEM_CONFIG); - t |= RT305X_SYSCFG_SRAM_CS0_MODE_WDT << - RT305X_SYSCFG_SRAM_CS0_MODE_SHIFT; - rt_sysc_w32(t, SYSC_REG_SYSTEM_CONFIG); -} - static unsigned long rt5350_get_mem_size(void) { void __iomem *sysc = (void __iomem *) KSEG1ADDR(RT305X_SYSC_BASE); diff --git a/arch/mips/ralink/rt3883.c b/arch/mips/ralink/rt3883.c index 4cef9162bd9b..3e0aa09c6b55 100644 --- a/arch/mips/ralink/rt3883.c +++ b/arch/mips/ralink/rt3883.c @@ -63,16 +63,6 @@ static struct rt2880_pmx_group rt3883_pinmux_data[] = { { 0 } }; -static void rt3883_wdt_reset(void) -{ - u32 t; - - /* enable WDT reset output on GPIO 2 */ - t = rt_sysc_r32(RT3883_SYSC_REG_SYSCFG1); - t |= RT3883_SYSCFG1_GPIO2_AS_WDT_OUT; - rt_sysc_w32(t, RT3883_SYSC_REG_SYSCFG1); -} - void __init ralink_clk_init(void) { unsigned long cpu_rate, sys_rate; From 04275d2a8af3269e2b0e04512a6b261dd3245545 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 Jan 2017 15:52:53 +0100 Subject: [PATCH 232/357] bcm63xx_enet: avoid uninitialized variable warning commit df384d435a5c034c735df3d9ea87a03172c59b56 upstream. gcc-7 and probably earlier versions get confused by this function and print a harmless warning: drivers/net/ethernet/broadcom/bcm63xx_enet.c: In function 'bcm_enet_open': drivers/net/ethernet/broadcom/bcm63xx_enet.c:1130:3: error: 'phydev' may be used uninitialized in this function [-Werror=maybe-uninitialized] This adds an initialization for the 'phydev' variable when it is unused and changes the check to test for that NULL pointer to make it clear that we always pass a valid pointer here. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 537090952c45..08d91efceed0 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -913,6 +913,8 @@ static int bcm_enet_open(struct net_device *dev) priv->old_link = 0; priv->old_duplex = -1; priv->old_pause = -1; + } else { + phydev = NULL; } /* mask all interrupts and request them */ @@ -1083,7 +1085,7 @@ static int bcm_enet_open(struct net_device *dev) enet_dmac_writel(priv, priv->dma_chan_int_mask, ENETDMAC_IRMASK, priv->tx_chan); - if (priv->has_phy) + if (phydev) phy_start(phydev); else bcm_enet_adjust_link(dev); @@ -1126,7 +1128,7 @@ out_freeirq: free_irq(dev->irq, dev); out_phy_disconnect: - if (priv->has_phy) + if (phydev) phy_disconnect(phydev); return ret; From b72ae5ca7a8be7cfa64c4f121b56cfb8ef088410 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Jan 2017 14:20:54 +0100 Subject: [PATCH 233/357] cpmac: remove hopeless #warning commit d43e6fb4ac4abfe4ef7c102833ed02330ad701e0 upstream. The #warning was present 10 years ago when the driver first got merged. As the platform is rather obsolete by now, it seems very unlikely that the warning will cause anyone to fix the code properly. kernelci.org reports the warning for every build in the meantime, so I think it's better to just turn it into a code comment to reduce noise. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ti/cpmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index 28097be2ff28..5127b7e48fcb 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -1211,7 +1211,7 @@ int cpmac_init(void) goto fail_alloc; } -#warning FIXME: unhardcode gpio&reset bits + /* FIXME: unhardcode gpio&reset bits */ ar7_gpio_disable(26); ar7_gpio_disable(27); ar7_device_reset(AR7_RESET_BIT_CPMAC_LO); From 8bb208d02accfe50cee2a9b5db3ea9da49a208b3 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 28 Sep 2016 22:55:54 -0400 Subject: [PATCH 234/357] tracing: Add #undef to fix compile error commit bf7165cfa23695c51998231c4efa080fe1d3548d upstream. There are several trace include files that define TRACE_INCLUDE_FILE. Include several of them in the same .c file (as I currently have in some code I am working on), and the compile will blow up with a "warning: "TRACE_INCLUDE_FILE" redefined #define TRACE_INCLUDE_FILE syscalls" Every other include file in include/trace/events/ avoids that issue by having a #undef TRACE_INCLUDE_FILE before the #define; syscalls.h should have one, too. Link: http://lkml.kernel.org/r/20160928225554.13bd7ac6@annuminas.surriel.com Fixes: b8007ef74222 ("tracing: Separate raw syscall from syscall tracer") Signed-off-by: Rik van Riel Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- include/trace/events/syscalls.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h index 14e49c798135..b35533b94277 100644 --- a/include/trace/events/syscalls.h +++ b/include/trace/events/syscalls.h @@ -1,5 +1,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM raw_syscalls +#undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE syscalls #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ) From ee6f7ee1e4cdb0098fee4593ddf11ca6028abef2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 5 Mar 2017 15:03:22 -0600 Subject: [PATCH 235/357] ucount: Remove the atomicity from ucount->count commit 040757f738e13caaa9c5078bca79aa97e11dde88 upstream. Always increment/decrement ucount->count under the ucounts_lock. The increments are there already and moving the decrements there means the locking logic of the code is simpler. This simplification in the locking logic fixes a race between put_ucounts and get_ucounts that could result in a use-after-free because the count could go zero then be found by get_ucounts and then be freed by put_ucounts. A bug presumably this one was found by a combination of syzkaller and KASAN. JongWhan Kim reported the syzkaller failure and Dmitry Vyukov spotted the race in the code. Fixes: f6b2db1a3e8d ("userns: Make the count of user namespaces per user") Reported-by: JongHwan Kim Reported-by: Dmitry Vyukov Reviewed-by: Andrei Vagin Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- include/linux/user_namespace.h | 2 +- kernel/ucount.c | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index eb209d4523f5..dc797739f164 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -65,7 +65,7 @@ struct ucounts { struct hlist_node node; struct user_namespace *ns; kuid_t uid; - atomic_t count; + int count; atomic_t ucount[UCOUNT_COUNTS]; }; diff --git a/kernel/ucount.c b/kernel/ucount.c index 4bbd38ec3788..f4ac18509ecf 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -139,7 +139,7 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) new->ns = ns; new->uid = uid; - atomic_set(&new->count, 0); + new->count = 0; spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); @@ -150,8 +150,10 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) ucounts = new; } } - if (!atomic_add_unless(&ucounts->count, 1, INT_MAX)) + if (ucounts->count == INT_MAX) ucounts = NULL; + else + ucounts->count += 1; spin_unlock_irq(&ucounts_lock); return ucounts; } @@ -160,13 +162,15 @@ static void put_ucounts(struct ucounts *ucounts) { unsigned long flags; - if (atomic_dec_and_test(&ucounts->count)) { - spin_lock_irqsave(&ucounts_lock, flags); + spin_lock_irqsave(&ucounts_lock, flags); + ucounts->count -= 1; + if (!ucounts->count) hlist_del_init(&ucounts->node); - spin_unlock_irqrestore(&ucounts_lock, flags); + else + ucounts = NULL; + spin_unlock_irqrestore(&ucounts_lock, flags); - kfree(ucounts); - } + kfree(ucounts); } static inline bool atomic_inc_below(atomic_t *v, int u) From 56d91e106b13f81fea9a6571240ab89eba6d9fe8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 1 Mar 2017 19:05:54 +0000 Subject: [PATCH 236/357] efi/arm: Fix boot crash with CONFIG_CPUMASK_OFFSTACK=y commit d1eb98143c56f24fef125f5bbed49ae0b52fb7d6 upstream. On ARM and arm64, we use a dedicated mm_struct to map the UEFI Runtime Services regions, which allows us to map those regions on demand, and in a way that is guaranteed to be compatible with incoming kernels across kexec. As it turns out, we don't fully initialize the mm_struct in the same way as process mm_structs are initialized on fork(), which results in the following crash on ARM if CONFIG_CPUMASK_OFFSTACK=y is enabled: ... EFI Variables Facility v0.08 2004-May-17 Unable to handle kernel NULL pointer dereference at virtual address 00000000 [...] Process swapper/0 (pid: 1) ... __memzero() check_and_switch_context() virt_efi_get_next_variable() efivar_init() efivars_sysfs_init() do_one_initcall() ... This is due to a missing call to mm_init_cpumask(), so add it. Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1488395154-29786-1-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/arm-runtime.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 7c75a8d9091a..6bdf39e1e385 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -65,6 +65,7 @@ static bool __init efi_virtmap_init(void) bool systab_found; efi_mm.pgd = pgd_alloc(&efi_mm); + mm_init_cpumask(&efi_mm); init_new_context(NULL, &efi_mm); systab_found = false; From 06996254a605913cd7c1927d0e8a89b5138e110d Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Wed, 15 Feb 2017 18:29:15 -0200 Subject: [PATCH 237/357] dw2102: don't do DMA on stack commit 606142af57dad981b78707234cfbd15f9f7b7125 upstream. On Kernel 4.9, WARNINGs about doing DMA on stack are hit at the dw2102 driver: one in su3000_power_ctrl() and the other in tt_s2_4600_frontend_attach(). Both were due to the use of buffers on the stack as parameters to dvb_usb_generic_rw() and the resulting attempt to do DMA with them. The device was non-functional as a result. So, switch this driver over to use a buffer within the device state structure, as has been done with other DVB-USB drivers. Tested with TechnoTrend TT-connect S2-4600. [mchehab@osg.samsung.com: fixed a warning at su3000_i2c_transfer() that state var were dereferenced before check 'd'] Signed-off-by: Jonathan McDowell Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/dw2102.c | 242 +++++++++++++++++------------ 1 file changed, 144 insertions(+), 98 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dw2102.c b/drivers/media/usb/dvb-usb/dw2102.c index 2c720cb2fb00..c3e67347a977 100644 --- a/drivers/media/usb/dvb-usb/dw2102.c +++ b/drivers/media/usb/dvb-usb/dw2102.c @@ -68,6 +68,7 @@ struct dw2102_state { u8 initialized; u8 last_lock; + u8 data[MAX_XFER_SIZE + 4]; struct i2c_client *i2c_client_demod; struct i2c_client *i2c_client_tuner; @@ -662,62 +663,72 @@ static int su3000_i2c_transfer(struct i2c_adapter *adap, struct i2c_msg msg[], int num) { struct dvb_usb_device *d = i2c_get_adapdata(adap); - u8 obuf[0x40], ibuf[0x40]; + struct dw2102_state *state; if (!d) return -ENODEV; + + state = d->priv; + if (mutex_lock_interruptible(&d->i2c_mutex) < 0) return -EAGAIN; + if (mutex_lock_interruptible(&d->data_mutex) < 0) { + mutex_unlock(&d->i2c_mutex); + return -EAGAIN; + } switch (num) { case 1: switch (msg[0].addr) { case SU3000_STREAM_CTRL: - obuf[0] = msg[0].buf[0] + 0x36; - obuf[1] = 3; - obuf[2] = 0; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 0, 0) < 0) + state->data[0] = msg[0].buf[0] + 0x36; + state->data[1] = 3; + state->data[2] = 0; + if (dvb_usb_generic_rw(d, state->data, 3, + state->data, 0, 0) < 0) err("i2c transfer failed."); break; case DW2102_RC_QUERY: - obuf[0] = 0x10; - if (dvb_usb_generic_rw(d, obuf, 1, ibuf, 2, 0) < 0) + state->data[0] = 0x10; + if (dvb_usb_generic_rw(d, state->data, 1, + state->data, 2, 0) < 0) err("i2c transfer failed."); - msg[0].buf[1] = ibuf[0]; - msg[0].buf[0] = ibuf[1]; + msg[0].buf[1] = state->data[0]; + msg[0].buf[0] = state->data[1]; break; default: /* always i2c write*/ - obuf[0] = 0x08; - obuf[1] = msg[0].addr; - obuf[2] = msg[0].len; + state->data[0] = 0x08; + state->data[1] = msg[0].addr; + state->data[2] = msg[0].len; - memcpy(&obuf[3], msg[0].buf, msg[0].len); + memcpy(&state->data[3], msg[0].buf, msg[0].len); - if (dvb_usb_generic_rw(d, obuf, msg[0].len + 3, - ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, msg[0].len + 3, + state->data, 1, 0) < 0) err("i2c transfer failed."); } break; case 2: /* always i2c read */ - obuf[0] = 0x09; - obuf[1] = msg[0].len; - obuf[2] = msg[1].len; - obuf[3] = msg[0].addr; - memcpy(&obuf[4], msg[0].buf, msg[0].len); + state->data[0] = 0x09; + state->data[1] = msg[0].len; + state->data[2] = msg[1].len; + state->data[3] = msg[0].addr; + memcpy(&state->data[4], msg[0].buf, msg[0].len); - if (dvb_usb_generic_rw(d, obuf, msg[0].len + 4, - ibuf, msg[1].len + 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, msg[0].len + 4, + state->data, msg[1].len + 1, 0) < 0) err("i2c transfer failed."); - memcpy(msg[1].buf, &ibuf[1], msg[1].len); + memcpy(msg[1].buf, &state->data[1], msg[1].len); break; default: warn("more than 2 i2c messages at a time is not handled yet."); break; } + mutex_unlock(&d->data_mutex); mutex_unlock(&d->i2c_mutex); return num; } @@ -845,17 +856,23 @@ static int su3000_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff) static int su3000_power_ctrl(struct dvb_usb_device *d, int i) { struct dw2102_state *state = (struct dw2102_state *)d->priv; - u8 obuf[] = {0xde, 0}; + int ret = 0; info("%s: %d, initialized %d", __func__, i, state->initialized); if (i && !state->initialized) { + mutex_lock(&d->data_mutex); + + state->data[0] = 0xde; + state->data[1] = 0; + state->initialized = 1; /* reset board */ - return dvb_usb_generic_rw(d, obuf, 2, NULL, 0, 0); + ret = dvb_usb_generic_rw(d, state->data, 2, NULL, 0, 0); + mutex_unlock(&d->data_mutex); } - return 0; + return ret; } static int su3000_read_mac_address(struct dvb_usb_device *d, u8 mac[6]) @@ -1310,49 +1327,57 @@ static int prof_7500_frontend_attach(struct dvb_usb_adapter *d) return 0; } -static int su3000_frontend_attach(struct dvb_usb_adapter *d) +static int su3000_frontend_attach(struct dvb_usb_adapter *adap) { - u8 obuf[3] = { 0xe, 0x80, 0 }; - u8 ibuf[] = { 0 }; + struct dvb_usb_device *d = adap->dev; + struct dw2102_state *state = d->priv; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + mutex_lock(&d->data_mutex); + + state->data[0] = 0xe; + state->data[1] = 0x80; + state->data[2] = 0; + + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x02; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x02; + state->data[2] = 1; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); msleep(300); - obuf[0] = 0xe; - obuf[1] = 0x83; - obuf[2] = 0; + state->data[0] = 0xe; + state->data[1] = 0x83; + state->data[2] = 0; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x83; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x83; + state->data[2] = 1; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0x51; + state->data[0] = 0x51; - if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0) err("command 0x51 transfer failed."); - d->fe_adap[0].fe = dvb_attach(ds3000_attach, &su3000_ds3000_config, - &d->dev->i2c_adap); - if (d->fe_adap[0].fe == NULL) + mutex_unlock(&d->data_mutex); + + adap->fe_adap[0].fe = dvb_attach(ds3000_attach, &su3000_ds3000_config, + &d->i2c_adap); + if (adap->fe_adap[0].fe == NULL) return -EIO; - if (dvb_attach(ts2020_attach, d->fe_adap[0].fe, + if (dvb_attach(ts2020_attach, adap->fe_adap[0].fe, &dw2104_ts2020_config, - &d->dev->i2c_adap)) { + &d->i2c_adap)) { info("Attached DS3000/TS2020!"); return 0; } @@ -1361,47 +1386,55 @@ static int su3000_frontend_attach(struct dvb_usb_adapter *d) return -EIO; } -static int t220_frontend_attach(struct dvb_usb_adapter *d) +static int t220_frontend_attach(struct dvb_usb_adapter *adap) { - u8 obuf[3] = { 0xe, 0x87, 0 }; - u8 ibuf[] = { 0 }; + struct dvb_usb_device *d = adap->dev; + struct dw2102_state *state = d->priv; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + mutex_lock(&d->data_mutex); + + state->data[0] = 0xe; + state->data[1] = 0x87; + state->data[2] = 0x0; + + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x86; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x86; + state->data[2] = 1; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x80; - obuf[2] = 0; + state->data[0] = 0xe; + state->data[1] = 0x80; + state->data[2] = 0; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); msleep(50); - obuf[0] = 0xe; - obuf[1] = 0x80; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x80; + state->data[2] = 1; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0x51; + state->data[0] = 0x51; - if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0) err("command 0x51 transfer failed."); - d->fe_adap[0].fe = dvb_attach(cxd2820r_attach, &cxd2820r_config, - &d->dev->i2c_adap, NULL); - if (d->fe_adap[0].fe != NULL) { - if (dvb_attach(tda18271_attach, d->fe_adap[0].fe, 0x60, - &d->dev->i2c_adap, &tda18271_config)) { + mutex_unlock(&d->data_mutex); + + adap->fe_adap[0].fe = dvb_attach(cxd2820r_attach, &cxd2820r_config, + &d->i2c_adap, NULL); + if (adap->fe_adap[0].fe != NULL) { + if (dvb_attach(tda18271_attach, adap->fe_adap[0].fe, 0x60, + &d->i2c_adap, &tda18271_config)) { info("Attached TDA18271HD/CXD2820R!"); return 0; } @@ -1411,23 +1444,30 @@ static int t220_frontend_attach(struct dvb_usb_adapter *d) return -EIO; } -static int m88rs2000_frontend_attach(struct dvb_usb_adapter *d) +static int m88rs2000_frontend_attach(struct dvb_usb_adapter *adap) { - u8 obuf[] = { 0x51 }; - u8 ibuf[] = { 0 }; + struct dvb_usb_device *d = adap->dev; + struct dw2102_state *state = d->priv; - if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0) + mutex_lock(&d->data_mutex); + + state->data[0] = 0x51; + + if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0) err("command 0x51 transfer failed."); - d->fe_adap[0].fe = dvb_attach(m88rs2000_attach, &s421_m88rs2000_config, - &d->dev->i2c_adap); + mutex_unlock(&d->data_mutex); - if (d->fe_adap[0].fe == NULL) + adap->fe_adap[0].fe = dvb_attach(m88rs2000_attach, + &s421_m88rs2000_config, + &d->i2c_adap); + + if (adap->fe_adap[0].fe == NULL) return -EIO; - if (dvb_attach(ts2020_attach, d->fe_adap[0].fe, + if (dvb_attach(ts2020_attach, adap->fe_adap[0].fe, &dw2104_ts2020_config, - &d->dev->i2c_adap)) { + &d->i2c_adap)) { info("Attached RS2000/TS2020!"); return 0; } @@ -1440,44 +1480,50 @@ static int tt_s2_4600_frontend_attach(struct dvb_usb_adapter *adap) { struct dvb_usb_device *d = adap->dev; struct dw2102_state *state = d->priv; - u8 obuf[3] = { 0xe, 0x80, 0 }; - u8 ibuf[] = { 0 }; struct i2c_adapter *i2c_adapter; struct i2c_client *client; struct i2c_board_info board_info; struct m88ds3103_platform_data m88ds3103_pdata = {}; struct ts2020_config ts2020_config = {}; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0) + mutex_lock(&d->data_mutex); + + state->data[0] = 0xe; + state->data[1] = 0x80; + state->data[2] = 0x0; + + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x02; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x02; + state->data[2] = 1; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); msleep(300); - obuf[0] = 0xe; - obuf[1] = 0x83; - obuf[2] = 0; + state->data[0] = 0xe; + state->data[1] = 0x83; + state->data[2] = 0; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x83; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x83; + state->data[2] = 1; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0x51; + state->data[0] = 0x51; - if (dvb_usb_generic_rw(d, obuf, 1, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0) err("command 0x51 transfer failed."); + mutex_unlock(&d->data_mutex); + /* attach demod */ m88ds3103_pdata.clk = 27000000; m88ds3103_pdata.i2c_wr_max = 33; From ac4666a7fd1aab0b4e60240ab84f9fd839900669 Mon Sep 17 00:00:00 2001 From: Qi Hou Date: Fri, 3 Mar 2017 15:57:11 +0800 Subject: [PATCH 238/357] i2c: add missing of_node_put in i2c_mux_del_adapters commit 2e1e4949f9dfb053122785cd73540bb1e61f768b upstream. Refcount of of_node is increased with of_node_get() in i2c_mux_add_adapter(). It must be decreased with of_node_put() in i2c_mux_del_adapters(). Signed-off-by: Qi Hou Reviewed-by: Zhang Xiao Signed-off-by: Peter Rosin Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-mux.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index 83768e85a919..2178266bca79 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -429,6 +429,7 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc) while (muxc->num_adapters) { struct i2c_adapter *adap = muxc->adapter[--muxc->num_adapters]; struct i2c_mux_priv *priv = adap->algo_data; + struct device_node *np = adap->dev.of_node; muxc->adapter[muxc->num_adapters] = NULL; @@ -438,6 +439,7 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc) sysfs_remove_link(&priv->adap.dev.kobj, "mux_device"); i2c_del_adapter(adap); + of_node_put(np); kfree(priv); } } From bc8d2eefe63f6f0f4592ab735d4c95c58956dcf9 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 14 Feb 2017 14:46:42 +0530 Subject: [PATCH 239/357] powerpc: Emulation support for load/store instructions on LE commit e148bd17f48bd17fca2f4f089ec879fa6e47e34c upstream. emulate_step() uses a number of underlying kernel functions that were initially not enabled for LE. This has been rectified since. So, fix emulate_step() for LE for the corresponding instructions. Reported-by: Anton Blanchard Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/lib/sstep.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 3362299b1859..6ca3b902f7b9 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1807,8 +1807,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case LARX: - if (regs->msr & MSR_LE) - return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ err = -EFAULT; @@ -1832,8 +1830,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case STCX: - if (regs->msr & MSR_LE) - return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ err = -EFAULT; @@ -1859,8 +1855,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case LOAD: - if (regs->msr & MSR_LE) - return 0; err = read_mem(®s->gpr[op.reg], op.ea, size, regs); if (!err) { if (op.type & SIGNEXT) @@ -1872,8 +1866,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_PPC_FPU case LOAD_FP: - if (regs->msr & MSR_LE) - return 0; if (size == 4) err = do_fp_load(op.reg, do_lfs, op.ea, size, regs); else @@ -1882,15 +1874,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: - if (regs->msr & MSR_LE) - return 0; err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case LOAD_VSX: - if (regs->msr & MSR_LE) - return 0; err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs); goto ldst_done; #endif @@ -1913,8 +1901,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case STORE: - if (regs->msr & MSR_LE) - return 0; if ((op.type & UPDATE) && size == sizeof(long) && op.reg == 1 && op.update_reg == 1 && !(regs->msr & MSR_PR) && @@ -1927,8 +1913,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_PPC_FPU case STORE_FP: - if (regs->msr & MSR_LE) - return 0; if (size == 4) err = do_fp_store(op.reg, do_stfs, op.ea, size, regs); else @@ -1937,15 +1921,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #endif #ifdef CONFIG_ALTIVEC case STORE_VMX: - if (regs->msr & MSR_LE) - return 0; err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case STORE_VSX: - if (regs->msr & MSR_LE) - return 0; err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs); goto ldst_done; #endif From 1ced52ead2441c493afd80209dec5903f6cdc04a Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Fri, 3 Mar 2017 11:58:44 +1100 Subject: [PATCH 240/357] powerpc/xics: Work around limitations of OPAL XICS priority handling commit a69e2fb70350a66f91175cd2625f1e8215c5b6e9 upstream. The CPPR (Current Processor Priority Register) of a XICS interrupt presentation controller contains a value N, such that only interrupts with a priority "more favoured" than N will be received by the CPU, where "more favoured" means "less than". So if the CPPR has the value 5 then only interrupts with a priority of 0-4 inclusive will be received. In theory the CPPR can support a value of 0 to 255 inclusive. In practice Linux only uses values of 0, 4, 5 and 0xff. Setting the CPPR to 0 rejects all interrupts, setting it to 0xff allows all interrupts. The values 4 and 5 are used to differentiate IPIs from external interrupts. Setting the CPPR to 5 allows IPIs to be received but not external interrupts. The CPPR emulation in the OPAL XICS implementation only directly supports priorities 0 and 0xff. All other priorities are considered equivalent, and mapped to a single priority value internally. This means when using icp-opal we can not allow IPIs but not externals. This breaks Linux's use of priority values when a CPU is hot unplugged. After migrating IRQs away from the CPU that is being offlined, we set the priority to 5, meaning we still want the offline CPU to receive IPIs. But the effect of the OPAL XICS emulation's use of a single priority value is that all interrupts are rejected by the CPU. With the CPU offline, and not receiving IPIs, we may not be able to wake it up to bring it back online. The first part of the fix is in icp_opal_set_cpu_priority(). CPPR values of 0 to 4 inclusive will correctly cause all interrupts to be rejected, so we pass those CPPR values through to OPAL. However if we are called with a CPPR of 5 or greater, the caller is expecting to be able to allow IPIs but not external interrupts. We know this doesn't work, so instead of rejecting all interrupts we choose the opposite which is to allow all interrupts. This is still not correct behaviour, but we know for the only existing caller (xics_migrate_irqs_away()), that it is the better option. The other part of the fix is in xics_migrate_irqs_away(). Instead of setting priority (CPPR) to 0, and then back to 5 before migrating IRQs, we migrate the IRQs before setting the priority back to 5. This should have no effect on an ICP backend with a working set_priority(), and on icp-opal it means we will keep all interrupts blocked until after we've finished doing the IRQ migration. Additionally we wait for 5ms after doing the migration to make sure there are no IRQs in flight. Fixes: d74361881f0d ("powerpc/xics: Add ICP OPAL backend") Suggested-by: Michael Ellerman Reported-by: Vaidyanathan Srinivasan Tested-by: Vaidyanathan Srinivasan Signed-off-by: Balbir Singh [mpe: Rewrote comments and change log, change delay to 5ms] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/sysdev/xics/icp-opal.c | 10 ++++++++++ arch/powerpc/sysdev/xics/xics-common.c | 17 ++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index c96c0cb95d87..32c46b424dd0 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -91,6 +91,16 @@ static unsigned int icp_opal_get_irq(void) static void icp_opal_set_cpu_priority(unsigned char cppr) { + /* + * Here be dragons. The caller has asked to allow only IPI's and not + * external interrupts. But OPAL XIVE doesn't support that. So instead + * of allowing no interrupts allow all. That's still not right, but + * currently the only caller who does this is xics_migrate_irqs_away() + * and it works in that case. + */ + if (cppr >= DEFAULT_PRIORITY) + cppr = LOWEST_PRIORITY; + xics_set_base_cppr(cppr); opal_int_set_cppr(cppr); iosync(); diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 69d858e51ac7..23efe4e42172 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -198,9 +199,6 @@ void xics_migrate_irqs_away(void) /* Remove ourselves from the global interrupt queue */ xics_set_cpu_giq(xics_default_distrib_server, 0); - /* Allow IPIs again... */ - icp_ops->set_priority(DEFAULT_PRIORITY); - for_each_irq_desc(virq, desc) { struct irq_chip *chip; long server; @@ -255,6 +253,19 @@ void xics_migrate_irqs_away(void) unlock: raw_spin_unlock_irqrestore(&desc->lock, flags); } + + /* Allow "sufficient" time to drop any inflight IRQ's */ + mdelay(5); + + /* + * Allow IPIs again. This is done at the very end, after migrating all + * interrupts, the expectation is that we'll only get woken up by an IPI + * interrupt beyond this point, but leave externals masked just to be + * safe. If we're using icp-opal this may actually allow all + * interrupts anyway, but that should be OK. + */ + icp_ops->set_priority(DEFAULT_PRIORITY); + } #endif /* CONFIG_HOTPLUG_CPU */ From ed99f5a09cc6ef662fd29a4ad50b221608a635a8 Mon Sep 17 00:00:00 2001 From: Ethan Zhao Date: Mon, 27 Feb 2017 17:08:44 +0900 Subject: [PATCH 241/357] PCI: Prevent VPD access for QLogic ISP2722 commit 0d5370d1d85251e5893ab7c90a429464de2e140b upstream. QLogic ISP2722-based 16/32Gb Fibre Channel to PCIe Adapter has the VPD access issue too, while read the common pci-sysfs access interface shown as /sys/devices/pci0000:00/0000:00:03.2/0000:0b:00.0/vpd with simple 'cat' could cause system hang and panic: Kernel panic - not syncing: An NMI occurred. Depending on your system the reason for the NMI is logged in any one of the following resources: 1. Integrated Management Log (IML) 2. OA Syslog 3. OA Forward Progress Log 4. iLO Event Log CPU: 0 PID: 15070 Comm: udevadm Not tainted 4.1.12 Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 12/27/2015 0000000000000086 000000007f0cdf51 ffff880c4fa05d58 ffffffff817193de ffffffffa00b42d8 0000000000000075 ffff880c4fa05dd8 ffffffff81714072 0000000000000008 ffff880c4fa05de8 ffff880c4fa05d88 000000007f0cdf51 Call Trace: [] dump_stack+0x63/0x81 [] panic+0xd0/0x20e [] hpwdt_pretimeout+0xdd/0xe0 [hpwdt] [] ? sched_clock+0x9/0x10 [] nmi_handle+0x91/0x170 [] ? nmi_handle+0x9c/0x170 [] io_check_error+0x1e/0xa0 [] default_do_nmi+0x99/0x140 [] do_nmi+0xf4/0x170 [] end_repeat_nmi+0x1a/0x1e [] ? pci_conf1_read+0xeb/0x120 [] ? pci_conf1_read+0xeb/0x120 [] ? pci_conf1_read+0xeb/0x120 <> [] raw_pci_read+0x23/0x40 [] pci_read+0x2c/0x30 [] pci_user_read_config_word+0x72/0x110 [] pci_vpd_pci22_wait+0x96/0x130 [] pci_vpd_pci22_read+0xdb/0x1a0 [] pci_read_vpd+0x20/0x30 [] read_vpd_attr+0x30/0x40 [] sysfs_kf_bin_read+0x47/0x70 [] kernfs_fop_read+0xae/0x180 [] __vfs_read+0x37/0x100 [] ? security_file_permission+0x84/0xa0 [] ? rw_verify_area+0x56/0xe0 [] vfs_read+0x86/0x140 [] SyS_read+0x55/0xd0 [] system_call_fastpath+0x12/0x71 Shutting down cpus with NMI Kernel Offset: disabled drm_kms_helper: panic occurred, switching back to text console So blacklist the access to its VPD. Signed-off-by: Ethan Zhao Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 3a035e073889..087a218a875f 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2173,6 +2173,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d, quirk_blacklist_vpd); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f, quirk_blacklist_vpd); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, PCI_ANY_ID, quirk_blacklist_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_QLOGIC, 0x2261, quirk_blacklist_vpd); /* * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the From de90394b0c6b9cf6e84c8a9ecfb9673e31b88a19 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 28 Feb 2017 14:25:45 +0800 Subject: [PATCH 242/357] usb: gadget: dummy_hcd: clear usb_gadget region before registration commit 5bbc852676ae08e818241cf66a3ffe4be44225c4 upstream. When the user does device unbind and rebind test, the kernel will show below dump due to usb_gadget memory region is dirty after unbind. Clear usb_gadget region for every new probe. root@imx6qdlsolo:/sys/bus/platform/drivers/dummy_udc# echo dummy_udc.0 > bind [ 102.523312] kobject (eddd78b0): tried to init an initialized object, something is seriously wrong. [ 102.532447] CPU: 0 PID: 734 Comm: sh Not tainted 4.10.0-rc7-00872-g1b2b8e9 #1298 [ 102.539866] Hardware name: Freescale i.MX6 SoloX (Device Tree) [ 102.545717] Backtrace: [ 102.548225] [] (dump_backtrace) from [] (show_stack+0x18/0x1c) [ 102.555822] r7:ede34000 r6:60010013 r5:00000000 r4:c0f29418 [ 102.561512] [] (show_stack) from [] (dump_stack+0xb4/0xe8) [ 102.568764] [] (dump_stack) from [] (kobject_init+0x80/0x9c) [ 102.576187] r10:0000001f r9:eddd7000 r8:eeaf8c10 r7:eddd78a8 r6:c177891c r5:c0f3b060 [ 102.584036] r4:eddd78b0 r3:00000000 [ 102.587641] [] (kobject_init) from [] (device_initialize+0x28/0xf8) [ 102.595665] r5:eebc4800 r4:eddd78a8 [ 102.599268] [] (device_initialize) from [] (device_register+0x14/0x20) [ 102.607556] r7:eddd78a8 r6:00000000 r5:eebc4800 r4:eddd78a8 [ 102.613256] [] (device_register) from [] (usb_add_gadget_udc_release+0x8c/0x1ec) [ 102.622410] r5:eebc4800 r4:eddd7860 [ 102.626015] [] (usb_add_gadget_udc_release) from [] (usb_add_gadget_udc+0x14/0x18) [ 102.635351] r10:0000001f r9:eddd7000 r8:eddd788c r7:bf003770 r6:eddd77f8 r5:eddd7818 [ 102.643198] r4:eddd785c r3:eddd7b24 [ 102.646834] [] (usb_add_gadget_udc) from [] (dummy_udc_probe+0x170/0x1c4 [dummy_hcd]) [ 102.656458] [] (dummy_udc_probe [dummy_hcd]) from [] (platform_drv_probe+0x54/0xb8) [ 102.665881] r10:00000008 r9:c1778960 r8:bf004128 r7:fffffdfb r6:bf004128 r5:eeaf8c10 [ 102.673727] r4:eeaf8c10 [ 102.676293] [] (platform_drv_probe) from [] (driver_probe_device+0x264/0x474) [ 102.685186] r7:00000000 r6:00000000 r5:c1778960 r4:eeaf8c10 [ 102.690876] [] (driver_probe_device) from [] (bind_store+0xb8/0x14c) [ 102.698994] r10:eeb3bb4c r9:ede34000 r8:0000000c r7:eeaf8c44 r6:bf004128 r5:c0f3b668 [ 102.706840] r4:eeaf8c10 [ 102.709402] [] (bind_store) from [] (drv_attr_store+0x28/0x34) [ 102.716998] r9:ede34000 r8:00000000 r7:ee3863c0 r6:ee3863c0 r5:c0538c80 r4:c053970c [ 102.724776] [] (drv_attr_store) from [] (sysfs_kf_write+0x50/0x54) [ 102.732711] r5:c0538c80 r4:0000000c [ 102.736313] [] (sysfs_kf_write) from [] (kernfs_fop_write+0x100/0x214) [ 102.744599] r7:ee3863c0 r6:eeb3bb40 r5:00000000 r4:00000000 [ 102.750287] [] (kernfs_fop_write) from [] (__vfs_write+0x34/0x120) [ 102.758231] r10:00000000 r9:ede34000 r8:c0108bc4 r7:0000000c r6:ede35f80 r5:c029bd84 [ 102.766077] r4:ee223780 [ 102.768638] [] (__vfs_write) from [] (vfs_write+0xa8/0x170) [ 102.775974] r9:ede34000 r8:c0108bc4 r7:ede35f80 r6:01861cb0 r5:ee223780 r4:0000000c [ 102.783743] [] (vfs_write) from [] (SyS_write+0x4c/0xa8) [ 102.790818] r9:ede34000 r8:c0108bc4 r7:0000000c r6:01861cb0 r5:ee223780 r4:ee223780 [ 102.798595] [] (SyS_write) from [] (ret_fast_syscall+0x0/0x1c) [ 102.806188] r7:00000004 r6:b6e83d58 r5:01861cb0 r4:0000000c Fixes: 90fccb529d24 ("usb: gadget: Gadget directory cleanup - group UDC drivers") Acked-by: Alan Stern Signed-off-by: Peter Chen Tested-by: Xiaolong Ye Reported-by: Fengguang Wu Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/dummy_hcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index a81d9ab861dc..4fa5de2eb501 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -1031,6 +1031,8 @@ static int dummy_udc_probe(struct platform_device *pdev) int rc; dum = *((void **)dev_get_platdata(&pdev->dev)); + /* Clear usb_gadget region for new registration to udc-core */ + memzero_explicit(&dum->gadget, sizeof(struct usb_gadget)); dum->gadget.name = gadget_name; dum->gadget.ops = &dummy_ops; dum->gadget.max_speed = USB_SPEED_SUPER; From 808ee146d733ff2792a26d715ac72e6e745c9a47 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 31 Jan 2017 13:24:54 +0200 Subject: [PATCH 243/357] usb: dwc3: gadget: make Set Endpoint Configuration macros safe commit 7369090a9fb57c3fc705ce355d2e4523a5a24716 upstream. Some gadget drivers are bad, bad boys. We notice that ADB was passing bad Burst Size which caused top bits of param0 to be overwritten which confused DWC3 when running this command. In order to avoid future issues, we're going to make sure values passed by macros are always safe for the controller. Note that ADB still needs a fix to *not* pass bad values. Reported-by: Mohamed Abbas Sugested-by: Adam Andruszak Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h index e4a1d974a5ae..39459b718e98 100644 --- a/drivers/usb/dwc3/gadget.h +++ b/drivers/usb/dwc3/gadget.h @@ -28,23 +28,23 @@ struct dwc3; #define gadget_to_dwc(g) (container_of(g, struct dwc3, gadget)) /* DEPCFG parameter 1 */ -#define DWC3_DEPCFG_INT_NUM(n) ((n) << 0) +#define DWC3_DEPCFG_INT_NUM(n) (((n) & 0x1f) << 0) #define DWC3_DEPCFG_XFER_COMPLETE_EN (1 << 8) #define DWC3_DEPCFG_XFER_IN_PROGRESS_EN (1 << 9) #define DWC3_DEPCFG_XFER_NOT_READY_EN (1 << 10) #define DWC3_DEPCFG_FIFO_ERROR_EN (1 << 11) #define DWC3_DEPCFG_STREAM_EVENT_EN (1 << 13) -#define DWC3_DEPCFG_BINTERVAL_M1(n) ((n) << 16) +#define DWC3_DEPCFG_BINTERVAL_M1(n) (((n) & 0xff) << 16) #define DWC3_DEPCFG_STREAM_CAPABLE (1 << 24) -#define DWC3_DEPCFG_EP_NUMBER(n) ((n) << 25) +#define DWC3_DEPCFG_EP_NUMBER(n) (((n) & 0x1f) << 25) #define DWC3_DEPCFG_BULK_BASED (1 << 30) #define DWC3_DEPCFG_FIFO_BASED (1 << 31) /* DEPCFG parameter 0 */ -#define DWC3_DEPCFG_EP_TYPE(n) ((n) << 1) -#define DWC3_DEPCFG_MAX_PACKET_SIZE(n) ((n) << 3) -#define DWC3_DEPCFG_FIFO_NUMBER(n) ((n) << 17) -#define DWC3_DEPCFG_BURST_SIZE(n) ((n) << 22) +#define DWC3_DEPCFG_EP_TYPE(n) (((n) & 0x3) << 1) +#define DWC3_DEPCFG_MAX_PACKET_SIZE(n) (((n) & 0x7ff) << 3) +#define DWC3_DEPCFG_FIFO_NUMBER(n) (((n) & 0x1f) << 17) +#define DWC3_DEPCFG_BURST_SIZE(n) (((n) & 0xf) << 22) #define DWC3_DEPCFG_DATA_SEQ_NUM(n) ((n) << 26) /* This applies for core versions earlier than 1.94a */ #define DWC3_DEPCFG_IGN_SEQ_NUM (1 << 31) From accadd8164b7a45645c8287839ca9d648415cb5e Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 15 Feb 2017 13:38:22 +0200 Subject: [PATCH 244/357] usb: dwc3-omap: Fix missing break in dwc3_omap_set_mailbox() commit 0913750f9fb6f26bcd00c8f9dd9a8d1b8d031246 upstream. We need to break from all cases if we want to treat each one of them separately. Reported-by: Gustavo A. R. Silva Fixes: d2728fb3e01f ("usb: dwc3: omap: Pass VBUS and ID events transparently") Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-omap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index 29e80cc9b634..5dd1832564c7 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -249,6 +249,7 @@ static void dwc3_omap_set_mailbox(struct dwc3_omap *omap, val = dwc3_omap_read_utmi_ctrl(omap); val |= USBOTGSS_UTMI_OTG_CTRL_IDDIG; dwc3_omap_write_utmi_ctrl(omap, val); + break; case OMAP_DWC3_VBUS_OFF: val = dwc3_omap_read_utmi_ctrl(omap); From 40192c96660f46b9aaaf2b8196856d230f859aca Mon Sep 17 00:00:00 2001 From: Jelle Martijn Kok Date: Tue, 21 Feb 2017 12:48:18 +0100 Subject: [PATCH 245/357] usb: ohci-at91: Do not drop unhandled USB suspend control requests commit 85550f9148a852ed363a386577ad31b97b95dfb8 upstream. In patch 2e2aa1bc7eff90ecm, USB suspend and wakeup control requests are passed to SFR_OHCIICR register. If a processor does not have such a register, this hub control request will be dropped. If no such a SFR register is available, all USB suspend control requests will now be processed using ohci_hub_control() (like before patch 2e2aa1bc7eff90ecm.) Tested on an Atmel AT91SAM9G20 with an on-board TI TUSB2046B hub chip If the last USB device is unplugged from the USB hub, the hub goes into sleep and will not wakeup when an USB devices is inserted. Fixes: 2e2aa1bc7eff90ec ("usb: ohci-at91: Forcibly suspend ports while USB suspend") Signed-off-by: Jelle Martijn Kok Tested-by: Wenyou Yang Cc: Wenyou Yang Cc: Alan Stern Acked-by: Nicolas Ferre Reviewed-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-at91.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c index b38a228134df..af0566da77e7 100644 --- a/drivers/usb/host/ohci-at91.c +++ b/drivers/usb/host/ohci-at91.c @@ -361,7 +361,7 @@ static int ohci_at91_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_SUSPEND: dev_dbg(hcd->self.controller, "SetPortFeat: SUSPEND\n"); - if (valid_port(wIndex)) { + if (valid_port(wIndex) && ohci_at91->sfr_regmap) { ohci_at91_port_suspend(ohci_at91->sfr_regmap, 1); return 0; @@ -404,7 +404,7 @@ static int ohci_at91_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_SUSPEND: dev_dbg(hcd->self.controller, "ClearPortFeature: SUSPEND\n"); - if (valid_port(wIndex)) { + if (valid_port(wIndex) && ohci_at91->sfr_regmap) { ohci_at91_port_suspend(ohci_at91->sfr_regmap, 0); return 0; From 5ce2e4ce4a8a02d1ca9ed0d4aeac7a24cd3a59b9 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 31 Jan 2017 14:54:45 +0200 Subject: [PATCH 246/357] usb: gadget: function: f_fs: pass companion descriptor along commit 2bfa0719ac2a9b2f3c91345873d3cdebd0296ba9 upstream. If we're dealing with SuperSpeed endpoints, we need to make sure to pass along the companion descriptor and initialize fields needed by the Gadget API. Eventually, f_fs.c should be converted to use config_ep_by_speed() like all other functions, though. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 8d412d8b1f29..89081b834615 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1833,11 +1833,14 @@ static int ffs_func_eps_enable(struct ffs_function *func) spin_lock_irqsave(&func->ffs->eps_lock, flags); do { struct usb_endpoint_descriptor *ds; + struct usb_ss_ep_comp_descriptor *comp_desc = NULL; + int needs_comp_desc = false; int desc_idx; - if (ffs->gadget->speed == USB_SPEED_SUPER) + if (ffs->gadget->speed == USB_SPEED_SUPER) { desc_idx = 2; - else if (ffs->gadget->speed == USB_SPEED_HIGH) + needs_comp_desc = true; + } else if (ffs->gadget->speed == USB_SPEED_HIGH) desc_idx = 1; else desc_idx = 0; @@ -1854,6 +1857,14 @@ static int ffs_func_eps_enable(struct ffs_function *func) ep->ep->driver_data = ep; ep->ep->desc = ds; + + comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds + + USB_DT_ENDPOINT_SIZE); + ep->ep->maxburst = comp_desc->bMaxBurst + 1; + + if (needs_comp_desc) + ep->ep->comp_desc = comp_desc; + ret = usb_ep_enable(ep->ep); if (likely(!ret)) { epfile->ep = ep; From 03123df08e6b77c0491af64e46054dbef5a1cea4 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 2 Mar 2017 10:44:58 +0200 Subject: [PATCH 247/357] Revert "usb: gadget: uvc: Add missing call for additional setup data" commit eb38d913c27f32f4df173791051fecf6aca34173 upstream. This reverts commit 4fbac5206afd01b717d4bdc58793d471f3391b4b. This commit breaks g_webcam when used with uvc-gadget [1]. The user space application (e.g. uvc-gadget) is responsible for sending response to UVC class specific requests on control endpoint in uvc_send_response() in uvc_v4l2.c. The bad commit was causing a duplicate response to be sent with incorrect response data thus causing UVC probe to fail at the host and broken control transfer endpoint at the gadget. [1] - git://git.ideasonboard.org/uvc-gadget.git Acked-by: Laurent Pinchart Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uvc.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index 27ed51b5082f..29b41b5dee04 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -258,13 +258,6 @@ uvc_function_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl) memcpy(&uvc_event->req, ctrl, sizeof(uvc_event->req)); v4l2_event_queue(&uvc->vdev, &v4l2_event); - /* Pass additional setup data to userspace */ - if (uvc->event_setup_out && uvc->event_length) { - uvc->control_req->length = uvc->event_length; - return usb_ep_queue(uvc->func.config->cdev->gadget->ep0, - uvc->control_req, GFP_ATOMIC); - } - return 0; } From d863767907527f0d43ea856a78c060d1f3599ac9 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Thu, 9 Mar 2017 15:39:36 +0200 Subject: [PATCH 248/357] usb: host: xhci-dbg: HCIVERSION should be a binary number commit f95e60a7dbecd2de816bb3ad517b3d4fbc20b507 upstream. According to xHCI spec, HCIVERSION containing a BCD encoding of the xHCI specification revision number, 0100h corresponds to xHCI version 1.0. Change "100" as "0x100". Cc: Lu Baolu Fixes: 04abb6de2825 ("xhci: Read and parse new xhci 1.1 capability register") Signed-off-by: Peter Chen Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-dbg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-dbg.c b/drivers/usb/host/xhci-dbg.c index 74c42f722678..3425154baf8b 100644 --- a/drivers/usb/host/xhci-dbg.c +++ b/drivers/usb/host/xhci-dbg.c @@ -111,7 +111,7 @@ static void xhci_print_cap_regs(struct xhci_hcd *xhci) xhci_dbg(xhci, "RTSOFF 0x%x:\n", temp & RTSOFF_MASK); /* xhci 1.1 controllers have the HCCPARAMS2 register */ - if (hci_version > 100) { + if (hci_version > 0x100) { temp = readl(&xhci->cap_regs->hcc_params2); xhci_dbg(xhci, "HCC PARAMS2 0x%x:\n", (unsigned int) temp); xhci_dbg(xhci, " HC %s Force save context capability", From 24db1c5a1c52c74b0218847c0a4cb1b40678cf84 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 9 Mar 2017 15:39:37 +0200 Subject: [PATCH 249/357] usb: host: xhci-plat: Fix timeout on removal of hot pluggable xhci controllers commit dcc7620cad5ad1326a78f4031a7bf4f0e5b42984 upstream. Upstream commit 98d74f9ceaef ("xhci: fix 10 second timeout on removal of PCI hotpluggable xhci controllers") fixes a problem with hot pluggable PCI xhci controllers which can result in excessive timeouts, to the point where the system reports a deadlock. The same problem is seen with hot pluggable xhci controllers using the xhci-plat driver, such as the driver used for Type-C ports on rk3399. Similar to hot-pluggable PCI controllers, the driver for this chip removes the xhci controller from the system when the Type-C cable is disconnected. The solution for PCI devices works just as well for non-PCI devices and avoids the problem. Signed-off-by: Guenter Roeck Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index abe360684f0b..5895e84f9dcc 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -274,6 +274,8 @@ static int xhci_plat_remove(struct platform_device *dev) struct xhci_hcd *xhci = hcd_to_xhci(hcd); struct clk *clk = xhci->clk; + xhci->xhc_state |= XHCI_STATE_REMOVING; + usb_remove_hcd(xhci->shared_hcd); usb_phy_shutdown(hcd->usb_phy); From 0ab90ddba78a3fb34c34971ec36f2d51ba7827ec Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2017 17:36:41 +0100 Subject: [PATCH 250/357] USB: serial: safe_serial: fix information leak in completion handler commit 8c76d7cd520ebffc1ea9ea0850d87a224a50c7f2 upstream. Add missing sanity check to the bulk-in completion handler to avoid an integer underflow that could be triggered by a malicious device. This avoids leaking up to 56 bytes from after the URB transfer buffer to user space. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/safe_serial.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c index 93c6c9b08daa..8a069aa154ed 100644 --- a/drivers/usb/serial/safe_serial.c +++ b/drivers/usb/serial/safe_serial.c @@ -200,6 +200,11 @@ static void safe_process_read_urb(struct urb *urb) if (!safe) goto out; + if (length < 2) { + dev_err(&port->dev, "malformed packet\n"); + return; + } + fcs = fcs_compute10(data, length, CRC10_INITFCS); if (fcs) { dev_err(&port->dev, "%s - bad CRC %x\n", __func__, fcs); From 6d6c5895f45431579c20f4183b25183f0e3afc92 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2017 17:36:38 +0100 Subject: [PATCH 251/357] USB: serial: omninet: fix reference leaks at open commit 30572418b445d85fcfe6c8fe84c947d2606767d8 upstream. This driver needlessly took another reference to the tty on open, a reference which was then never released on close. This lead to not just a leak of the tty, but also a driver reference leak that prevented the driver from being unloaded after a port had once been opened. Fixes: 4a90f09b20f4 ("tty: usb-serial krefs") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/omninet.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c index a180b17d2432..76564b3bebb9 100644 --- a/drivers/usb/serial/omninet.c +++ b/drivers/usb/serial/omninet.c @@ -142,12 +142,6 @@ static int omninet_port_remove(struct usb_serial_port *port) static int omninet_open(struct tty_struct *tty, struct usb_serial_port *port) { - struct usb_serial *serial = port->serial; - struct usb_serial_port *wport; - - wport = serial->port[1]; - tty_port_tty_set(&wport->port, tty); - return usb_serial_generic_open(tty, port); } From 653418adaf1026a10e0c2e4e29b7319610117b33 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 7 Mar 2017 16:11:03 +0100 Subject: [PATCH 252/357] USB: iowarrior: fix NULL-deref at probe commit b7321e81fc369abe353cf094d4f0dc2fe11ab95f upstream. Make sure to check for the required interrupt-in endpoint to avoid dereferencing a NULL-pointer should a malicious device lack such an endpoint. Note that a fairly recent change purported to fix this issue, but added an insufficient test on the number of endpoints only, a test which can now be removed. Fixes: 4ec0ef3a8212 ("USB: iowarrior: fix oops with malicious USB descriptors") Fixes: 946b960d13c1 ("USB: add driver for iowarrior devices.") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 095778ff984d..3ad058cbe6ca 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -781,12 +781,6 @@ static int iowarrior_probe(struct usb_interface *interface, iface_desc = interface->cur_altsetting; dev->product_id = le16_to_cpu(udev->descriptor.idProduct); - if (iface_desc->desc.bNumEndpoints < 1) { - dev_err(&interface->dev, "Invalid number of endpoints\n"); - retval = -EINVAL; - goto error; - } - /* set up the endpoint information */ for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { endpoint = &iface_desc->endpoint[i].desc; @@ -797,6 +791,13 @@ static int iowarrior_probe(struct usb_interface *interface, /* this one will match for the IOWarrior56 only */ dev->int_out_endpoint = endpoint; } + + if (!dev->int_in_endpoint) { + dev_err(&interface->dev, "no interrupt-in endpoint found\n"); + retval = -ENODEV; + goto error; + } + /* we have to check the report_size often, so remember it in the endianness suitable for our machine */ dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint); if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) && From f06b17020a848721f157524c7aed1cea43ea5131 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 7 Mar 2017 16:11:04 +0100 Subject: [PATCH 253/357] USB: iowarrior: fix NULL-deref in write commit de46e56653de7b3b54baa625bd582635008b8d05 upstream. Make sure to verify that we have the required interrupt-out endpoint for IOWarrior56 devices to avoid dereferencing a NULL-pointer in write should a malicious device lack such an endpoint. Fixes: 946b960d13c1 ("USB: add driver for iowarrior devices.") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 3ad058cbe6ca..37c63cb39714 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -798,6 +798,14 @@ static int iowarrior_probe(struct usb_interface *interface, goto error; } + if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) { + if (!dev->int_out_endpoint) { + dev_err(&interface->dev, "no interrupt-out endpoint found\n"); + retval = -ENODEV; + goto error; + } + } + /* we have to check the report_size often, so remember it in the endianness suitable for our machine */ dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint); if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) && From 449b0bb23708ad5e1c0c214632dead8fc5273098 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2017 17:36:37 +0100 Subject: [PATCH 254/357] USB: serial: io_ti: fix NULL-deref in interrupt callback commit 0b1d250afb8eb9d65afb568bac9b9f9253a82b49 upstream. Fix a NULL-pointer dereference in the interrupt callback should a malicious device send data containing a bad port number by adding the missing sanity check. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/io_ti.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index c02808a30436..67d68b502a51 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -1674,6 +1674,12 @@ static void edge_interrupt_callback(struct urb *urb) function = TIUMP_GET_FUNC_FROM_CODE(data[0]); dev_dbg(dev, "%s - port_number %d, function %d, info 0x%x\n", __func__, port_number, function, data[1]); + + if (port_number >= edge_serial->serial->num_ports) { + dev_err(dev, "bad port number %d\n", port_number); + goto exit; + } + port = edge_serial->serial->port[port_number]; edge_port = usb_get_serial_port_data(port); if (!edge_port) { From d0ef6ecee85e17742d8bce1559872cb542d6ccac Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2017 17:36:40 +0100 Subject: [PATCH 255/357] USB: serial: io_ti: fix information leak in completion handler commit 654b404f2a222f918af9b0cd18ad469d0c941a8e upstream. Add missing sanity check to the bulk-in completion handler to avoid an integer underflow that can be triggered by a malicious device. This avoids leaking 128 kB of memory content from after the URB transfer buffer to user space. Fixes: 8c209e6782ca ("USB: make actual_length in struct urb field u32") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/io_ti.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index 67d68b502a51..f1a8fdcd8674 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -1761,7 +1761,7 @@ static void edge_bulk_in_callback(struct urb *urb) port_number = edge_port->port->port_number; - if (edge_port->lsr_event) { + if (urb->actual_length > 0 && edge_port->lsr_event) { edge_port->lsr_event = 0; dev_dbg(dev, "%s ===== Port %u LSR Status = %02x, Data = %02x ======\n", __func__, port_number, edge_port->lsr_mask, *data); From ce8ab5f168f654101ecba3f4443b12d4afdee841 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 25 Feb 2017 18:36:44 +0200 Subject: [PATCH 256/357] serial: samsung: Continue to work if DMA request fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f98c7bce570bdbe344b74ff5daa7dfeef3f22929 upstream. If DMA is not available (even when configured in DeviceTree), the driver will fail the startup procedure thus making serial console not available. For example this causes boot failure on QEMU ARMv7 (Exynos4210, SMDKC210): [    1.302575] OF: amba_device_add() failed (-19) for /amba/pdma@12680000 ... [   11.435732] samsung-uart 13800000.serial: DMA request failed [   72.963893] samsung-uart 13800000.serial: DMA request failed [   73.143361] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000000 DMA is not necessary for serial to work, so continue with UART startup after emitting a warning. Fixes: 62c37eedb74c ("serial: samsung: add dma reqest/release functions") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index f44615fa474d..3e2ef4fd7382 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -1036,8 +1036,10 @@ static int s3c64xx_serial_startup(struct uart_port *port) if (ourport->dma) { ret = s3c24xx_serial_request_dma(ourport); if (ret < 0) { - dev_warn(port->dev, "DMA request failed\n"); - return ret; + dev_warn(port->dev, + "DMA request failed, DMA will not be used\n"); + devm_kfree(port->dev, ourport->dma); + ourport->dma = NULL; } } From 9d89c20f3b8f82ddb1d7ef63748ad74691549e80 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Thu, 2 Mar 2017 15:23:42 +0100 Subject: [PATCH 257/357] KVM: s390: Fix guest migration for huge guests resulting in panic commit 2e4d88009f57057df7672fa69a32b5224af54d37 upstream. While we can technically not run huge page guests right now, we can setup a guest with huge pages. Trying to migrate it will trigger a VM_BUG_ON and, if the kernel is not configured to panic on a BUG, it will happily try to work on non-existing page table entries. With this patch, we always return "dirty" if we encounter a large page when migrating. This at least fixes the immediate problem until we have proper handling for both kind of pages. Fixes: 15f36eb ("KVM: s390: Add proper dirty bitmap support to S390 kvm.") Signed-off-by: Janosch Frank Acked-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/mm/pgtable.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index d56ef26d4681..7678f7956409 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -606,12 +606,29 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) { spinlock_t *ptl; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; pgste_t pgste; pte_t *ptep; pte_t pte; bool dirty; - ptep = get_locked_pte(mm, addr, &ptl); + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (!pud) + return false; + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return false; + /* We can't run guests backed by huge pages, but userspace can + * still set them up and then try to migrate them without any + * migration support. + */ + if (pmd_large(*pmd)) + return true; + + ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (unlikely(!ptep)) return false; From d29e6215e5ab9a6b64b5cee57598ae8ec05666a1 Mon Sep 17 00:00:00 2001 From: Jintack Lim Date: Mon, 6 Mar 2017 05:42:37 -0800 Subject: [PATCH 258/357] KVM: arm/arm64: Let vcpu thread modify its own active state commit 370a0ec1819990f8e2a93df7cc9c0146980ed45f upstream. Currently, if a vcpu thread tries to change the active state of an interrupt which is already on the same vcpu's AP list, it will loop forever. Since the VGIC mmio handler is called after a vcpu has already synced back the LR state to the struct vgic_irq, we can just let it proceed safely. Reviewed-by: Marc Zyngier Signed-off-by: Jintack Lim Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic/vgic-mmio.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index ebe1b9fa3c4d..85814d1bad11 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -187,21 +187,37 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool new_active_state) { + struct kvm_vcpu *requester_vcpu; spin_lock(&irq->irq_lock); + + /* + * The vcpu parameter here can mean multiple things depending on how + * this function is called; when handling a trap from the kernel it + * depends on the GIC version, and these functions are also called as + * part of save/restore from userspace. + * + * Therefore, we have to figure out the requester in a reliable way. + * + * When accessing VGIC state from user space, the requester_vcpu is + * NULL, which is fine, because we guarantee that no VCPUs are running + * when accessing VGIC state from user space so irq->vcpu->cpu is + * always -1. + */ + requester_vcpu = kvm_arm_get_running_vcpu(); + /* * If this virtual IRQ was written into a list register, we * have to make sure the CPU that runs the VCPU thread has - * synced back LR state to the struct vgic_irq. We can only - * know this for sure, when either this irq is not assigned to - * anyone's AP list anymore, or the VCPU thread is not - * running on any CPUs. + * synced back the LR state to the struct vgic_irq. * - * In the opposite case, we know the VCPU thread may be on its - * way back from the guest and still has to sync back this - * IRQ, so we release and re-acquire the spin_lock to let the - * other thread sync back the IRQ. + * As long as the conditions below are true, we know the VCPU thread + * may be on its way back from the guest (we kicked the VCPU thread in + * vgic_change_active_prepare) and still has to sync back this IRQ, + * so we release and re-acquire the spin_lock to let the other thread + * sync back the IRQ. */ while (irq->vcpu && /* IRQ may have state in an LR somewhere */ + irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */ irq->vcpu->cpu != -1) /* VCPU thread is running */ cond_resched_lock(&irq->irq_lock); From 21582cd0b64ca464084cb872b3d348065b0b54af Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 15 Feb 2017 11:26:10 -0500 Subject: [PATCH 259/357] dm: flush queued bios when process blocks to avoid deadlock commit d67a5f4b5947aba4bfe9a80a2b86079c215ca755 upstream. Commit df2cb6daa4 ("block: Avoid deadlocks with bio allocation by stacking drivers") created a workqueue for every bio set and code in bio_alloc_bioset() that tries to resolve some low-memory deadlocks by redirecting bios queued on current->bio_list to the workqueue if the system is low on memory. However other deadlocks (see below **) may happen, without any low memory condition, because generic_make_request is queuing bios to current->bio_list (rather than submitting them). ** the related dm-snapshot deadlock is detailed here: https://www.redhat.com/archives/dm-devel/2016-July/msg00065.html Fix this deadlock by redirecting any bios on current->bio_list to the bio_set's rescue workqueue on every schedule() call. Consequently, when the process blocks on a mutex, the bios queued on current->bio_list are dispatched to independent workqueus and they can complete without waiting for the mutex to be available. The structure blk_plug contains an entry cb_list and this list can contain arbitrary callback functions that are called when the process blocks. To implement this fix DM (ab)uses the onstack plug's cb_list interface to get its flush_current_bio_list() called at schedule() time. This fixes the snapshot deadlock - if the map method blocks, flush_current_bio_list() will be called and it redirects bios waiting on current->bio_list to appropriate workqueues. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1267650 Depends-on: df2cb6daa4 ("block: Avoid deadlocks with bio allocation by stacking drivers") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ef7bf1dd6900..628ba001bb3c 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -972,10 +972,61 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) } EXPORT_SYMBOL_GPL(dm_accept_partial_bio); +/* + * Flush current->bio_list when the target map method blocks. + * This fixes deadlocks in snapshot and possibly in other targets. + */ +struct dm_offload { + struct blk_plug plug; + struct blk_plug_cb cb; +}; + +static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule) +{ + struct dm_offload *o = container_of(cb, struct dm_offload, cb); + struct bio_list list; + struct bio *bio; + + INIT_LIST_HEAD(&o->cb.list); + + if (unlikely(!current->bio_list)) + return; + + list = *current->bio_list; + bio_list_init(current->bio_list); + + while ((bio = bio_list_pop(&list))) { + struct bio_set *bs = bio->bi_pool; + if (unlikely(!bs) || bs == fs_bio_set) { + bio_list_add(current->bio_list, bio); + continue; + } + + spin_lock(&bs->rescue_lock); + bio_list_add(&bs->rescue_list, bio); + queue_work(bs->rescue_workqueue, &bs->rescue_work); + spin_unlock(&bs->rescue_lock); + } +} + +static void dm_offload_start(struct dm_offload *o) +{ + blk_start_plug(&o->plug); + o->cb.callback = flush_current_bio_list; + list_add(&o->cb.list, ¤t->plug->cb_list); +} + +static void dm_offload_end(struct dm_offload *o) +{ + list_del(&o->cb.list); + blk_finish_plug(&o->plug); +} + static void __map_bio(struct dm_target_io *tio) { int r; sector_t sector; + struct dm_offload o; struct bio *clone = &tio->clone; struct dm_target *ti = tio->ti; @@ -988,7 +1039,11 @@ static void __map_bio(struct dm_target_io *tio) */ atomic_inc(&tio->io->io_count); sector = clone->bi_iter.bi_sector; + + dm_offload_start(&o); r = ti->type->map(ti, clone); + dm_offload_end(&o); + if (r == DM_MAPIO_REMAPPED) { /* the bio has been remapped so dispatch it */ From 39df5977fd410a8ebd7f736455531440327e9ff0 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Wed, 22 Feb 2017 18:48:01 -0300 Subject: [PATCH 260/357] rc: raw decoder for keymap protocol is not loaded on register commit 413808685dd7c9b54bbc5af79da2eaddd0fc3cb2 upstream. When the protocol is set via the sysfs protocols attribute, the decoder is loaded. However, when it is not when a device is first plugged in or registered. Fixes: acc1c3c ("[media] media: rc: load decoder modules on-demand") Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/rc-main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index d9c1f2ff7119..aba77357ab23 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -1411,6 +1411,7 @@ int rc_register_device(struct rc_dev *dev) int attr = 0; int minor; int rc; + u64 rc_type; if (!dev || !dev->map_name) return -EINVAL; @@ -1496,14 +1497,18 @@ int rc_register_device(struct rc_dev *dev) goto out_input; } + rc_type = BIT_ULL(rc_map->rc_type); + if (dev->change_protocol) { - u64 rc_type = (1ll << rc_map->rc_type); rc = dev->change_protocol(dev, &rc_type); if (rc < 0) goto out_raw; dev->enabled_protocols = rc_type; } + if (dev->driver_type == RC_DRIVER_IR_RAW) + ir_raw_load_modules(&rc_type); + /* Allow the RC sysfs nodes to be accessible */ atomic_set(&dev->initialized, 1); From 8fed8fc18839d7aec7ea046d79eb814dce6255c3 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 14 Feb 2017 11:31:15 -0500 Subject: [PATCH 261/357] ext4: don't BUG when truncating encrypted inodes on the orphan list commit 0d06863f903ac5f4f6efb0273079d27de3e53a28 upstream. Fix a BUG when the kernel tries to mount a file system constructed as follows: echo foo > foo.txt mke2fs -Fq -t ext4 -O encrypt foo.img 100 debugfs -w foo.img << EOF write foo.txt a set_inode_field a i_flags 0x80800 set_super_value s_last_orphan 12 quit EOF root@kvm-xfstests:~# mount -o loop foo.img /mnt [ 160.238770] ------------[ cut here ]------------ [ 160.240106] kernel BUG at /usr/projects/linux/ext4/fs/ext4/inode.c:3874! [ 160.240106] invalid opcode: 0000 [#1] SMP [ 160.240106] Modules linked in: [ 160.240106] CPU: 0 PID: 2547 Comm: mount Tainted: G W 4.10.0-rc3-00034-gcdd33b941b67 #227 [ 160.240106] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1 04/01/2014 [ 160.240106] task: f4518000 task.stack: f47b6000 [ 160.240106] EIP: ext4_block_zero_page_range+0x1a7/0x2b4 [ 160.240106] EFLAGS: 00010246 CPU: 0 [ 160.240106] EAX: 00000001 EBX: f7be4b50 ECX: f47b7dc0 EDX: 00000007 [ 160.240106] ESI: f43b05a8 EDI: f43babec EBP: f47b7dd0 ESP: f47b7dac [ 160.240106] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 [ 160.240106] CR0: 80050033 CR2: bfd85b08 CR3: 34a00680 CR4: 000006f0 [ 160.240106] Call Trace: [ 160.240106] ext4_truncate+0x1e9/0x3e5 [ 160.240106] ext4_fill_super+0x286f/0x2b1e [ 160.240106] ? set_blocksize+0x2e/0x7e [ 160.240106] mount_bdev+0x114/0x15f [ 160.240106] ext4_mount+0x15/0x17 [ 160.240106] ? ext4_calculate_overhead+0x39d/0x39d [ 160.240106] mount_fs+0x58/0x115 [ 160.240106] vfs_kern_mount+0x4b/0xae [ 160.240106] do_mount+0x671/0x8c3 [ 160.240106] ? _copy_from_user+0x70/0x83 [ 160.240106] ? strndup_user+0x31/0x46 [ 160.240106] SyS_mount+0x57/0x7b [ 160.240106] do_int80_syscall_32+0x4f/0x61 [ 160.240106] entry_INT80_32+0x2f/0x2f [ 160.240106] EIP: 0xb76b919e [ 160.240106] EFLAGS: 00000246 CPU: 0 [ 160.240106] EAX: ffffffda EBX: 08053838 ECX: 08052188 EDX: 080537e8 [ 160.240106] ESI: c0ed0000 EDI: 00000000 EBP: 080537e8 ESP: bfa13660 [ 160.240106] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b [ 160.240106] Code: 59 8b 00 a8 01 0f 84 09 01 00 00 8b 07 66 25 00 f0 66 3d 00 80 75 61 89 f8 e8 3e e2 ff ff 84 c0 74 56 83 bf 48 02 00 00 00 75 02 <0f> 0b 81 7d e8 00 10 00 00 74 02 0f 0b 8b 43 04 8b 53 08 31 c9 [ 160.240106] EIP: ext4_block_zero_page_range+0x1a7/0x2b4 SS:ESP: 0068:f47b7dac [ 160.317241] ---[ end trace d6a773a375c810a5 ]--- The problem is that when the kernel tries to truncate an inode in ext4_truncate(), it tries to clear any on-disk data beyond i_size. Without the encryption key, it can't do that, and so it triggers a BUG. E2fsck does *not* provide this service, and in practice most file systems have their orphan list processed by e2fsck, so to avoid crashing, this patch skips this step if we don't have access to the encryption key (which is the case when processing the orphan list; in all other cases, we will have the encryption key, or the kernel wouldn't have allowed the file to be opened). An open question is whether the fact that e2fsck isn't clearing the bytes beyond i_size causing problems --- and if we've lived with it not doing it for so long, can we drop this from the kernel replay of the orphan list in all cases (not just when we don't have the key for encrypted inodes). Addresses-Google-Bug: #35209576 Signed-off-by: Theodore Ts'o Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1d4f5faa04b5..dc9d64ac5969 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3824,6 +3824,10 @@ static int ext4_block_truncate_page(handle_t *handle, unsigned blocksize; struct inode *inode = mapping->host; + /* If we are processing an encrypted inode during orphan list handling */ + if (ext4_encrypted_inode(inode) && !fscrypt_has_encryption_key(inode)) + return 0; + blocksize = inode->i_sb->s_blocksize; length = blocksize - (offset & (blocksize - 1)); From c8186699527b5b0ee080e510afac9a3db797aa1b Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 18 Jan 2017 14:10:32 +0200 Subject: [PATCH 262/357] IB/mlx5: Verify that Q counters are supported commit 45bded2c216da6010184ac5ebe88c27f73439009 upstream. Make sure that the Q counters are supported by the FW before trying to allocate/deallocte them, this will avoid driver load failure when they aren't supported by the FW. Fixes: 0837e86a7a34 ('IB/mlx5: Add per port counters') Signed-off-by: Kamal Heib Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/main.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4cab29ea394c..11bfa27b022c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3141,9 +3141,11 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (err) goto err_rsrc; - err = mlx5_ib_alloc_q_counters(dev); - if (err) - goto err_odp; + if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { + err = mlx5_ib_alloc_q_counters(dev); + if (err) + goto err_odp; + } err = ib_register_device(&dev->ib_dev, NULL); if (err) @@ -3171,7 +3173,8 @@ err_dev: ib_unregister_device(&dev->ib_dev); err_q_cnt: - mlx5_ib_dealloc_q_counters(dev); + if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) + mlx5_ib_dealloc_q_counters(dev); err_odp: mlx5_ib_odp_remove_one(dev); @@ -3201,7 +3204,8 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) mlx5_remove_roce_notifier(dev); ib_unregister_device(&dev->ib_dev); - mlx5_ib_dealloc_q_counters(dev); + if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) + mlx5_ib_dealloc_q_counters(dev); destroy_umrc_res(dev); mlx5_ib_odp_remove_one(dev); destroy_dev_resources(&dev->devr); From 8a16224b405492bb0ebf450d9f3761580ebe1761 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 18 Mar 2017 19:15:30 +0800 Subject: [PATCH 263/357] Linux 4.9.16 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 03df4fcacdf2..4e0f962eb434 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 15 +SUBLEVEL = 16 EXTRAVERSION = NAME = Roaring Lionus From 9e354abf9ce35fbce434b99f0f4ab292152049af Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 22 Feb 2017 17:20:12 +0200 Subject: [PATCH 264/357] net/mlx5e: Register/unregister vport representors on interface attach/detach [ Upstream commit 6f08a22c5fb2b9aefb8ecd8496758e7a677c1fde ] Currently vport representors are added only on driver load and removed on driver unload. Apparently we forgot to handle them when we added the seamless reset flow feature. This caused to leave the representors netdevs alive and active with open HW resources on pci shutdown and on error reset flows. To overcome this we move their handling to interface attach/detach, so they would be cleaned up on shutdown and recreated on reset flows. Fixes: 26e59d8077a3 ("net/mlx5e: Implement mlx5e interface attach/detach callbacks") Signed-off-by: Saeed Mahameed Reviewed-by: Hadar Hen Zion Reviewed-by: Roi Dayan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b30671376a3d..8657b3be20d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3936,6 +3936,19 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) } } +static void mlx5e_unregister_vport_rep(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + int vport; + + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return; + + for (vport = 1; vport < total_vfs; vport++) + mlx5_eswitch_unregister_vport_rep(esw, vport); +} + void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -3983,6 +3996,7 @@ static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv) return err; } + mlx5e_register_vport_rep(mdev); return 0; } @@ -3994,6 +4008,7 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) if (!netif_device_present(netdev)) return; + mlx5e_unregister_vport_rep(mdev); mlx5e_detach_netdev(mdev, netdev); mlx5e_destroy_mdev_resources(mdev); } @@ -4012,8 +4027,6 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) if (err) return NULL; - mlx5e_register_vport_rep(mdev); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) ppriv = &esw->offloads.vport_reps[0]; @@ -4065,13 +4078,7 @@ void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) { - struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); struct mlx5e_priv *priv = vpriv; - int vport; - - for (vport = 1; vport < total_vfs; vport++) - mlx5_eswitch_unregister_vport_rep(esw, vport); unregister_netdev(priv->netdev); mlx5e_detach(mdev, vpriv); From a63326646995ffeacea6517c89cd178abb2c2af8 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 22 Feb 2017 17:20:13 +0200 Subject: [PATCH 265/357] net/mlx5e: Do not reduce LRO WQE size when not using build_skb [ Upstream commit 4078e637c12f1e0a74293f1ec9563f42bff14a03 ] When rq_type is Striding RQ, no room of SKB_RESERVE is needed as SKB allocation is not done via build_skb. Fixes: e4b85508072b ("net/mlx5e: Slightly reduce hardware LRO size") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8657b3be20d8..d4fa851ced2a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -81,6 +81,7 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) { priv->params.rq_wq_type = rq_type; + priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; @@ -92,6 +93,10 @@ static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + + /* Extra room needed for build_skb */ + priv->params.lro_wqe_sz -= MLX5_RX_HEADROOM + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); } priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, BIT(priv->params.log_rq_size)); @@ -3473,12 +3478,6 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev)); - priv->params.lro_wqe_sz = - MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - - /* Extra room needed for build_skb */ - MLX5_RX_HEADROOM - - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - /* Initialize pflags */ MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); From 0c6e38e791e94ae0b2a773ea7262688719072316 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 22 Feb 2017 17:20:16 +0200 Subject: [PATCH 266/357] net/mlx5e: Fix wrong CQE decompression [ Upstream commit 36154be40a28e4afaa0416da2681d80b7e2ca319 ] In cqe compression with striding RQ, the decompression of the CQE field wqe_counter was done with a wrong wraparound value. This caused handling cqes with a wrong pointer to wqe (rx descriptor) and creating SKBs with wrong data, pointing to wrong (and already consumed) strides/pages. The meaning of the CQE field wqe_counter in striding RQ holds the stride index instead of the WQE index. Hence, when decompressing a CQE, wqe_counter should have wrapped-around the number of strides in a single multi-packet WQE. We dropped this wrap-around mask at all in CQE decompression of striding RQ. It is not needed as in such cases the CQE compression session would break because of different value of wqe_id field, starting a new compression session. Tested: ethtool -K ethxx lro off/on ethtool --set-priv-flags ethxx rx_cqe_compress on super_netperf 16 {ipv4,ipv6} -t TCP_STREAM -m 50 -D verified no csum errors and no page refcount issues. Fixes: 7219ab34f184 ("net/mlx5e: CQE compression") Signed-off-by: Tariq Toukan Reported-by: Tom Herbert Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index e7b2158bb48a..796bdf06122c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -92,19 +92,18 @@ static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n) static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq, struct mlx5e_cq *cq, u32 cqcc) { - u16 wqe_cnt_step; - cq->title.byte_cnt = cq->mini_arr[cq->mini_arr_idx].byte_cnt; cq->title.check_sum = cq->mini_arr[cq->mini_arr_idx].checksum; cq->title.op_own &= 0xf0; cq->title.op_own |= 0x01 & (cqcc >> cq->wq.log_sz); cq->title.wqe_counter = cpu_to_be16(cq->decmprs_wqe_counter); - wqe_cnt_step = - rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? - mpwrq_get_cqe_consumed_strides(&cq->title) : 1; - cq->decmprs_wqe_counter = - (cq->decmprs_wqe_counter + wqe_cnt_step) & rq->wq.sz_m1; + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + cq->decmprs_wqe_counter += + mpwrq_get_cqe_consumed_strides(&cq->title); + else + cq->decmprs_wqe_counter = + (cq->decmprs_wqe_counter + 1) & rq->wq.sz_m1; } static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq, From ee2da79de289d062c9c3a5714b3f4d37fa6c868f Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Thu, 23 Feb 2017 17:19:41 +0100 Subject: [PATCH 267/357] vxlan: correctly validate VXLAN ID against VXLAN_N_VID [ Upstream commit 4e37d6911f36545b286d15073f6f2222f840e81c ] The incorrect check caused an off-by-one error: the maximum VID 0xffffff was unusable. Fixes: d342894c5d2f ("vxlan: virtual extensible lan") Signed-off-by: Matthias Schiffer Acked-by: Jiri Benc Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index d4f495b41bd4..7e3b2e27b244 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2637,7 +2637,7 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) if (data[IFLA_VXLAN_ID]) { __u32 id = nla_get_u32(data[IFLA_VXLAN_ID]); - if (id >= VXLAN_VID_MASK) + if (id >= VXLAN_N_VID) return -ERANGE; } From f7081057d12c0cb50a84869bae03fb9d406037f0 Mon Sep 17 00:00:00 2001 From: David Forster Date: Fri, 24 Feb 2017 14:20:32 +0000 Subject: [PATCH 268/357] vti6: return GRE_KEY for vti6 [ Upstream commit 7dcdf941cdc96692ab99fd790c8cc68945514851 ] Align vti6 with vti by returning GRE_KEY flag. This enables iproute2 to display tunnel keys on "ip -6 tunnel show" Signed-off-by: David Forster Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_vti.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index c299c1e2bbf0..66c2b4b41793 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -691,6 +691,10 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) u->link = p->link; u->i_key = p->i_key; u->o_key = p->o_key; + if (u->i_key) + u->i_flags |= GRE_KEY; + if (u->o_key) + u->o_flags |= GRE_KEY; u->proto = p->proto; memcpy(u->name, p->name, sizeof(u->name)); From a64407fafe09ca33b09aabd49c5d526b1b6b7528 Mon Sep 17 00:00:00 2001 From: Brian Russell Date: Fri, 24 Feb 2017 17:47:11 +0000 Subject: [PATCH 269/357] vxlan: don't allow overwrite of config src addr [ Upstream commit 1158632b5a2dcce0786c1b1b99654e81cc867981 ] When using IPv6 transport and a default dst, a pointer to the configured source address is passed into the route lookup. If no source address is configured, then the value is overwritten. IPv6 route lookup ignores egress ifindex match if the source address is set, so if egress ifindex match is desired, the source address must be passed as any. The overwrite breaks this for subsequent lookups. Avoid this by copying the configured address to an existing stack variable and pass a pointer to that instead. Fixes: 272d96a5ab10 ("net: vxlan: lwt: Use source ip address during route lookup.") Signed-off-by: Brian Russell Acked-by: Jiri Benc Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 7e3b2e27b244..ac3f8d55240f 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1942,7 +1942,6 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, const struct iphdr *old_iph; union vxlan_addr *dst; union vxlan_addr remote_ip, local_ip; - union vxlan_addr *src; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; __be16 src_port = 0, dst_port; @@ -1960,7 +1959,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port; vni = rdst->remote_vni; dst = &rdst->remote_ip; - src = &vxlan->cfg.saddr; + local_ip = vxlan->cfg.saddr; dst_cache = &rdst->dst_cache; } else { if (!info) { @@ -1979,7 +1978,6 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, local_ip.sin6.sin6_addr = info->key.u.ipv6.src; } dst = &remote_ip; - src = &local_ip; dst_cache = &info->dst_cache; } @@ -2028,7 +2026,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, rt = vxlan_get_route(vxlan, skb, rdst ? rdst->remote_ifindex : 0, tos, dst->sin.sin_addr.s_addr, - &src->sin.sin_addr.s_addr, + &local_ip.sin.sin_addr.s_addr, dst_cache, info); if (IS_ERR(rt)) { netdev_dbg(dev, "no route to %pI4\n", @@ -2071,7 +2069,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (err < 0) goto xmit_tx_error; - udp_tunnel_xmit_skb(rt, sk, skb, src->sin.sin_addr.s_addr, + udp_tunnel_xmit_skb(rt, sk, skb, local_ip.sin.sin_addr.s_addr, dst->sin.sin_addr.s_addr, tos, ttl, df, src_port, dst_port, xnet, !udp_sum); #if IS_ENABLED(CONFIG_IPV6) @@ -2087,7 +2085,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ndst = vxlan6_get_route(vxlan, skb, rdst ? rdst->remote_ifindex : 0, tos, label, &dst->sin6.sin6_addr, - &src->sin6.sin6_addr, + &local_ip.sin6.sin6_addr, dst_cache, info); if (IS_ERR(ndst)) { netdev_dbg(dev, "no route to %pI6\n", @@ -2134,7 +2132,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, return; } udp_tunnel6_xmit_skb(ndst, sk, skb, dev, - &src->sin6.sin6_addr, + &local_ip.sin6.sin6_addr, &dst->sin6.sin6_addr, tos, ttl, label, src_port, dst_port, !udp_sum); #endif From 36931eb015ee44fe2c8f220b46500b25f7282442 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 26 Feb 2017 17:14:35 +0200 Subject: [PATCH 270/357] ipv4: mask tos for input route [ Upstream commit 6e28099d38c0e50d62c1afc054e37e573adf3d21 ] Restore the lost masking of TOS in input route code to allow ip rules to match it properly. Problem [1] noticed by Shmulik Ladkani [1] http://marc.info/?t=137331755300040&r=1&w=2 Fixes: 89aef8921bfb ("ipv4: Delete routing cache.") Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d851cae27dac..17e6fbf30448 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1968,6 +1968,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, { int res; + tos &= IPTOS_RT_MASK; rcu_read_lock(); /* Multicast recognition logic is moved from route cache to here. From 063893e4ec88c253ff52a8e8486169fcf5e2d840 Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Fri, 24 Feb 2017 11:00:32 -0500 Subject: [PATCH 271/357] net sched actions: decrement module reference count after table flush. [ Upstream commit edb9d1bff4bbe19b8ae0e71b1f38732591a9eeb2 ] When tc actions are loaded as a module and no actions have been installed, flushing them would result in actions removed from the memory, but modules reference count not being decremented, so that the modules would not be unloaded. Following is example with GACT action: % sudo modprobe act_gact % lsmod Module Size Used by act_gact 16384 0 % % sudo tc actions ls action gact % % sudo tc actions flush action gact % lsmod Module Size Used by act_gact 16384 1 % sudo tc actions flush action gact % lsmod Module Size Used by act_gact 16384 2 % sudo rmmod act_gact rmmod: ERROR: Module act_gact is in use .... After the fix: % lsmod Module Size Used by act_gact 16384 0 % % sudo tc actions add action pass index 1 % sudo tc actions add action pass index 2 % sudo tc actions add action pass index 3 % lsmod Module Size Used by act_gact 16384 3 % % sudo tc actions flush action gact % lsmod Module Size Used by act_gact 16384 0 % % sudo tc actions flush action gact % lsmod Module Size Used by act_gact 16384 0 % sudo rmmod act_gact % lsmod Module Size Used by % Fixes: f97017cdefef ("net-sched: Fix actions flushing") Signed-off-by: Roman Mashak Signed-off-by: Jamal Hadi Salim Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_api.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index c6c2a93cc2a2..c651cfce9be6 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -820,10 +820,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, goto out_module_put; err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops); - if (err < 0) + if (err <= 0) goto out_module_put; - if (err == 0) - goto noflush_out; nla_nest_end(skb, nest); @@ -840,7 +838,6 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, out_module_put: module_put(ops->owner); err_out: -noflush_out: kfree_skb(skb); return err; } From 837786cbbb60f00770b02efd15546b36008b536e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Paul=20H=C3=BCber?= Date: Sun, 26 Feb 2017 17:58:19 +0100 Subject: [PATCH 272/357] l2tp: avoid use-after-free caused by l2tp_ip_backlog_recv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 51fb60eb162ab84c5edf2ae9c63cf0b878e5547e ] l2tp_ip_backlog_recv may not return -1 if the packet gets dropped. The return value is passed up to ip_local_deliver_finish, which treats negative values as an IP protocol number for resubmission. Signed-off-by: Paul Hüber Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index c0f0750639bd..ff750bb334fa 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -388,7 +388,7 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb) drop: IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS); kfree_skb(skb); - return -1; + return 0; } /* Userspace will call sendmsg() on the tunnel socket to send L2TP From feaa5bab43350b95428dc01cf1f064305e72dd87 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 20 Jan 2017 15:31:52 -0800 Subject: [PATCH 273/357] net: phy: Avoid deadlock during phy_error() [ Upstream commit eab127717a6af54401ba534790c793ec143cd1fc ] phy_error() is called in the PHY state machine workqueue context, and calls phy_trigger_machine() which does a cancel_delayed_work_sync() of the workqueue we execute from, causing a deadlock situation. Augment phy_trigger_machine() machine with a sync boolean indicating whether we should use cancel_*_sync() or just cancel_*_work(). Fixes: 3c293f4e08b5 ("net: phy: Trigger state machine on state change and not polling.") Reported-by: Russell King Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index f424b867f73e..201ffa5fe4f7 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -611,14 +611,18 @@ void phy_start_machine(struct phy_device *phydev) * phy_trigger_machine - trigger the state machine to run * * @phydev: the phy_device struct + * @sync: indicate whether we should wait for the workqueue cancelation * * Description: There has been a change in state which requires that the * state machine runs. */ -static void phy_trigger_machine(struct phy_device *phydev) +static void phy_trigger_machine(struct phy_device *phydev, bool sync) { - cancel_delayed_work_sync(&phydev->state_queue); + if (sync) + cancel_delayed_work_sync(&phydev->state_queue); + else + cancel_delayed_work(&phydev->state_queue); queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, 0); } @@ -655,7 +659,7 @@ static void phy_error(struct phy_device *phydev) phydev->state = PHY_HALTED; mutex_unlock(&phydev->lock); - phy_trigger_machine(phydev); + phy_trigger_machine(phydev, false); } /** @@ -817,7 +821,7 @@ void phy_change(struct work_struct *work) } /* reschedule state queue work to run as soon as possible */ - phy_trigger_machine(phydev); + phy_trigger_machine(phydev, true); return; ignore: @@ -907,7 +911,7 @@ void phy_start(struct phy_device *phydev) if (do_resume) phy_resume(phydev); - phy_trigger_machine(phydev); + phy_trigger_machine(phydev, true); } EXPORT_SYMBOL(phy_start); From 0a40da4a74f1767c8ff7faffe86bcb06cae280ed Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 24 Feb 2017 11:43:36 -0800 Subject: [PATCH 274/357] vxlan: lock RCU on TX path [ Upstream commit 56de859e9967c070464a9a9f4f18d73f9447298e ] There is no guarantees that callers of the TX path will hold the RCU lock. Grab it explicitly. Fixes: c6fcc4fc5f8b ("vxlan: avoid using stale vxlan socket.") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index ac3f8d55240f..3c4c2cf6d444 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1955,6 +1955,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, info = skb_tunnel_info(skb); + rcu_read_lock(); if (rdst) { dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port; vni = rdst->remote_vni; @@ -1985,7 +1986,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (did_rsc) { /* short-circuited back to local bridge */ vxlan_encap_bypass(skb, vxlan, vxlan); - return; + goto out_unlock; } goto drop; } @@ -2054,7 +2055,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (!dst_vxlan) goto tx_error; vxlan_encap_bypass(skb, vxlan, dst_vxlan); - return; + goto out_unlock; } if (!info) @@ -2115,7 +2116,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (!dst_vxlan) goto tx_error; vxlan_encap_bypass(skb, vxlan, dst_vxlan); - return; + goto out_unlock; } if (!info) @@ -2129,7 +2130,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (err < 0) { dst_release(ndst); dev->stats.tx_errors++; - return; + goto out_unlock; } udp_tunnel6_xmit_skb(ndst, sk, skb, dev, &local_ip.sin6.sin6_addr, @@ -2137,7 +2138,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, label, src_port, dst_port, !udp_sum); #endif } - +out_unlock: + rcu_read_unlock(); return; drop: @@ -2153,6 +2155,7 @@ tx_error: dev->stats.tx_errors++; tx_free: dev_kfree_skb(skb); + rcu_read_unlock(); } /* Transmit local packets over Vxlan From 02595f472548738cd1ab3abf424277d6f489048f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 24 Feb 2017 11:43:37 -0800 Subject: [PATCH 275/357] geneve: lock RCU on TX path [ Upstream commit a717e3f740803cc88bd5c9a70c93504f6a368663 ] There is no guarantees that callers of the TX path will hold the RCU lock. Grab it explicitly. Fixes: fceb9c3e3825 ("geneve: avoid using stale geneve socket.") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/geneve.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 8b4822ad27cb..3c1f89ab0110 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1039,16 +1039,22 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); struct ip_tunnel_info *info = NULL; + int err; if (geneve->collect_md) info = skb_tunnel_info(skb); + rcu_read_lock(); #if IS_ENABLED(CONFIG_IPV6) if ((info && ip_tunnel_info_af(info) == AF_INET6) || (!info && geneve->remote.sa.sa_family == AF_INET6)) - return geneve6_xmit_skb(skb, dev, info); + err = geneve6_xmit_skb(skb, dev, info); + else #endif - return geneve_xmit_skb(skb, dev, info); + err = geneve_xmit_skb(skb, dev, info); + rcu_read_unlock(); + + return err; } static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict) From 8f4db60c7fb5120050309717123efcfd25a02034 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 28 Feb 2017 08:55:40 +0100 Subject: [PATCH 276/357] mlxsw: spectrum_router: Avoid potential packets loss [ Upstream commit f7df4923fa986247e93ec2cdff5ca168fff14dcf ] When the structure of the LPM tree changes (f.e., due to the addition of a new prefix), we unbind the old tree and then bind the new one. This may result in temporary packet loss. Instead, overwrite the old binding with the new one. Fixes: 6b75c4807db3 ("mlxsw: spectrum_router: Add virtual router management") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index e83072da6272..690563099313 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -500,30 +500,40 @@ static int mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, struct mlxsw_sp_prefix_usage *req_prefix_usage) { - struct mlxsw_sp_lpm_tree *lpm_tree; + struct mlxsw_sp_lpm_tree *lpm_tree = vr->lpm_tree; + struct mlxsw_sp_lpm_tree *new_tree; + int err; - if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, - &vr->lpm_tree->prefix_usage)) + if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, &lpm_tree->prefix_usage)) return 0; - lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage, + new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage, vr->proto, false); - if (IS_ERR(lpm_tree)) { + if (IS_ERR(new_tree)) { /* We failed to get a tree according to the required * prefix usage. However, the current tree might be still good * for us if our requirement is subset of the prefixes used * in the tree. */ if (mlxsw_sp_prefix_usage_subset(req_prefix_usage, - &vr->lpm_tree->prefix_usage)) + &lpm_tree->prefix_usage)) return 0; - return PTR_ERR(lpm_tree); + return PTR_ERR(new_tree); } - mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); - mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); + /* Prevent packet loss by overwriting existing binding */ + vr->lpm_tree = new_tree; + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); + if (err) + goto err_tree_bind; + mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); + + return 0; + +err_tree_bind: vr->lpm_tree = lpm_tree; - return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); + mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree); + return err; } static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, From bbaeb9b73fa890dd7a6fa379a7e4822687643a4e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Mar 2017 08:39:49 -0800 Subject: [PATCH 277/357] tcp/dccp: block BH for SYN processing [ Upstream commit 449809a66c1d0b1563dee84493e14bf3104d2d7e ] SYN processing really was meant to be handled from BH. When I got rid of BH blocking while processing socket backlog in commit 5413d1babe8f ("net: do not block BH while processing socket backlog"), I forgot that a malicious user could transition to TCP_LISTEN from a state that allowed (SYN) packets to be parked in the socket backlog while socket is owned by the thread doing the listen() call. Sure enough syzkaller found this and reported the bug ;) ================================= [ INFO: inconsistent lock state ] 4.10.0+ #60 Not tainted --------------------------------- inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage. syz-executor0/5090 [HC0[0]:SC0[0]:HE1:SE1] takes: (&(&hashinfo->ehash_locks[i])->rlock){+.?...}, at: [] spin_lock include/linux/spinlock.h:299 [inline] (&(&hashinfo->ehash_locks[i])->rlock){+.?...}, at: [] inet_ehash_insert+0x240/0xad0 net/ipv4/inet_hashtables.c:407 {IN-SOFTIRQ-W} state was registered at: mark_irqflags kernel/locking/lockdep.c:2923 [inline] __lock_acquire+0xbcf/0x3270 kernel/locking/lockdep.c:3295 lock_acquire+0x241/0x580 kernel/locking/lockdep.c:3753 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151 spin_lock include/linux/spinlock.h:299 [inline] inet_ehash_insert+0x240/0xad0 net/ipv4/inet_hashtables.c:407 reqsk_queue_hash_req net/ipv4/inet_connection_sock.c:753 [inline] inet_csk_reqsk_queue_hash_add+0x1b7/0x2a0 net/ipv4/inet_connection_sock.c:764 tcp_conn_request+0x25cc/0x3310 net/ipv4/tcp_input.c:6399 tcp_v4_conn_request+0x157/0x220 net/ipv4/tcp_ipv4.c:1262 tcp_rcv_state_process+0x802/0x4130 net/ipv4/tcp_input.c:5889 tcp_v4_do_rcv+0x56b/0x940 net/ipv4/tcp_ipv4.c:1433 tcp_v4_rcv+0x2e12/0x3210 net/ipv4/tcp_ipv4.c:1711 ip_local_deliver_finish+0x4ce/0xc40 net/ipv4/ip_input.c:216 NF_HOOK include/linux/netfilter.h:257 [inline] ip_local_deliver+0x1ce/0x710 net/ipv4/ip_input.c:257 dst_input include/net/dst.h:492 [inline] ip_rcv_finish+0xb1d/0x2110 net/ipv4/ip_input.c:396 NF_HOOK include/linux/netfilter.h:257 [inline] ip_rcv+0xd90/0x19c0 net/ipv4/ip_input.c:487 __netif_receive_skb_core+0x1ad1/0x3400 net/core/dev.c:4179 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4217 netif_receive_skb_internal+0x1d6/0x430 net/core/dev.c:4245 napi_skb_finish net/core/dev.c:4602 [inline] napi_gro_receive+0x4e6/0x680 net/core/dev.c:4636 e1000_receive_skb drivers/net/ethernet/intel/e1000/e1000_main.c:4033 [inline] e1000_clean_rx_irq+0x5e0/0x1490 drivers/net/ethernet/intel/e1000/e1000_main.c:4489 e1000_clean+0xb9a/0x2910 drivers/net/ethernet/intel/e1000/e1000_main.c:3834 napi_poll net/core/dev.c:5171 [inline] net_rx_action+0xe70/0x1900 net/core/dev.c:5236 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 invoke_softirq kernel/softirq.c:364 [inline] irq_exit+0x19e/0x1d0 kernel/softirq.c:405 exiting_irq arch/x86/include/asm/apic.h:658 [inline] do_IRQ+0x81/0x1a0 arch/x86/kernel/irq.c:250 ret_from_intr+0x0/0x20 native_safe_halt+0x6/0x10 arch/x86/include/asm/irqflags.h:53 arch_safe_halt arch/x86/include/asm/paravirt.h:98 [inline] default_idle+0x8f/0x410 arch/x86/kernel/process.c:271 arch_cpu_idle+0xa/0x10 arch/x86/kernel/process.c:262 default_idle_call+0x36/0x60 kernel/sched/idle.c:96 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x348/0x440 kernel/sched/idle.c:243 cpu_startup_entry+0x18/0x20 kernel/sched/idle.c:345 start_secondary+0x344/0x440 arch/x86/kernel/smpboot.c:272 verify_cpu+0x0/0xfc irq event stamp: 1741 hardirqs last enabled at (1741): [] __raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:160 [inline] hardirqs last enabled at (1741): [] _raw_spin_unlock_irqrestore+0xf7/0x1a0 kernel/locking/spinlock.c:191 hardirqs last disabled at (1740): [] __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:108 [inline] hardirqs last disabled at (1740): [] _raw_spin_lock_irqsave+0xa2/0x110 kernel/locking/spinlock.c:159 softirqs last enabled at (1738): [] __do_softirq+0x7cf/0xb7d kernel/softirq.c:310 softirqs last disabled at (1571): [] do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&hashinfo->ehash_locks[i])->rlock); lock(&(&hashinfo->ehash_locks[i])->rlock); *** DEADLOCK *** 1 lock held by syz-executor0/5090: #0: (sk_lock-AF_INET6){+.+.+.}, at: [] lock_sock include/net/sock.h:1460 [inline] #0: (sk_lock-AF_INET6){+.+.+.}, at: [] sock_setsockopt+0x233/0x1e40 net/core/sock.c:683 stack backtrace: CPU: 1 PID: 5090 Comm: syz-executor0 Not tainted 4.10.0+ #60 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x292/0x398 lib/dump_stack.c:51 print_usage_bug+0x3ef/0x450 kernel/locking/lockdep.c:2387 valid_state kernel/locking/lockdep.c:2400 [inline] mark_lock_irq kernel/locking/lockdep.c:2602 [inline] mark_lock+0xf30/0x1410 kernel/locking/lockdep.c:3065 mark_irqflags kernel/locking/lockdep.c:2941 [inline] __lock_acquire+0x6dc/0x3270 kernel/locking/lockdep.c:3295 lock_acquire+0x241/0x580 kernel/locking/lockdep.c:3753 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151 spin_lock include/linux/spinlock.h:299 [inline] inet_ehash_insert+0x240/0xad0 net/ipv4/inet_hashtables.c:407 reqsk_queue_hash_req net/ipv4/inet_connection_sock.c:753 [inline] inet_csk_reqsk_queue_hash_add+0x1b7/0x2a0 net/ipv4/inet_connection_sock.c:764 dccp_v6_conn_request+0xada/0x11b0 net/dccp/ipv6.c:380 dccp_rcv_state_process+0x51e/0x1660 net/dccp/input.c:606 dccp_v6_do_rcv+0x213/0x350 net/dccp/ipv6.c:632 sk_backlog_rcv include/net/sock.h:896 [inline] __release_sock+0x127/0x3a0 net/core/sock.c:2052 release_sock+0xa5/0x2b0 net/core/sock.c:2539 sock_setsockopt+0x60f/0x1e40 net/core/sock.c:1016 SYSC_setsockopt net/socket.c:1782 [inline] SyS_setsockopt+0x2fb/0x3a0 net/socket.c:1765 entry_SYSCALL_64_fastpath+0x1f/0xc2 RIP: 0033:0x4458b9 RSP: 002b:00007fe8b26c2b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 0000000000000006 RCX: 00000000004458b9 RDX: 000000000000001a RSI: 0000000000000001 RDI: 0000000000000006 RBP: 00000000006e2110 R08: 0000000000000010 R09: 0000000000000000 R10: 00000000208c3000 R11: 0000000000000292 R12: 0000000000708000 R13: 0000000020000000 R14: 0000000000001000 R15: 0000000000000000 Fixes: 5413d1babe8f ("net: do not block BH while processing socket backlog") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/input.c | 10 ++++++++-- net/ipv4/tcp_input.c | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/net/dccp/input.c b/net/dccp/input.c index 8fedc2d49770..4a05d7876850 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -577,6 +577,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct dccp_sock *dp = dccp_sk(sk); struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); const int old_state = sk->sk_state; + bool acceptable; int queued = 0; /* @@ -603,8 +604,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, */ if (sk->sk_state == DCCP_LISTEN) { if (dh->dccph_type == DCCP_PKT_REQUEST) { - if (inet_csk(sk)->icsk_af_ops->conn_request(sk, - skb) < 0) + /* It is possible that we process SYN packets from backlog, + * so we need to make sure to disable BH right there. + */ + local_bh_disable(); + acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0; + local_bh_enable(); + if (!acceptable) return 1; consume_skb(skb); return 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c71d49ce0c93..ce42ded59958 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5916,9 +5916,15 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (th->syn) { if (th->fin) goto discard; - if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) - return 1; + /* It is possible that we process SYN packets from backlog, + * so we need to make sure to disable BH right there. + */ + local_bh_disable(); + acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0; + local_bh_enable(); + if (!acceptable) + return 1; consume_skb(skb); return 0; } From fa7c48fb3ad1744dab7a9a2783ae589bd87b732b Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Wed, 1 Mar 2017 09:55:28 +0000 Subject: [PATCH 278/357] net: bridge: allow IPv6 when multicast flood is disabled [ Upstream commit 8953de2f02ad7b15e4964c82f9afd60f128e4e98 ] Even with multicast flooding turned off, IPv6 ND should still work so that IPv6 connectivity is provided. Allow this by continuing to flood multicast traffic originated by us. Fixes: b6cb5ac8331b ("net: bridge: add per-port multicast flood flag") Cc: Nikolay Aleksandrov Signed-off-by: Mike Manning Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_forward.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 7cb41aee4c82..8498e3503605 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -186,8 +186,9 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, /* Do not flood unicast traffic to ports that turn it off */ if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD)) continue; + /* Do not flood if mc off, except for traffic we originate */ if (pkt_type == BR_PKT_MULTICAST && - !(p->flags & BR_MCAST_FLOOD)) + !(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev) continue; /* Do not flood to ports that enable proxy ARP */ From 62fe0521fb6cc3b887fdc00ddca9c38d2614b8b0 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 1 Mar 2017 12:57:20 +0100 Subject: [PATCH 279/357] net: don't call strlen() on the user buffer in packet_bind_spkt() [ Upstream commit 540e2894f7905538740aaf122bd8e0548e1c34a4 ] KMSAN (KernelMemorySanitizer, a new error detection tool) reports use of uninitialized memory in packet_bind_spkt(): Acked-by: Eric Dumazet ================================================================== BUG: KMSAN: use of unitialized memory CPU: 0 PID: 1074 Comm: packet Not tainted 4.8.0-rc6+ #1891 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 0000000000000000 ffff88006b6dfc08 ffffffff82559ae8 ffff88006b6dfb48 ffffffff818a7c91 ffffffff85b9c870 0000000000000092 ffffffff85b9c550 0000000000000000 0000000000000092 00000000ec400911 0000000000000002 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x238/0x290 lib/dump_stack.c:51 [] kmsan_report+0x276/0x2e0 mm/kmsan/kmsan.c:1003 [] __msan_warning+0x5b/0xb0 mm/kmsan/kmsan_instr.c:424 [< inline >] strlen lib/string.c:484 [] strlcpy+0x9d/0x200 lib/string.c:144 [] packet_bind_spkt+0x144/0x230 net/packet/af_packet.c:3132 [] SYSC_bind+0x40d/0x5f0 net/socket.c:1370 [] SyS_bind+0x82/0xa0 net/socket.c:1356 [] entry_SYSCALL_64_fastpath+0x13/0x8f arch/x86/entry/entry_64.o:? chained origin: 00000000eba00911 [] save_stack_trace+0x27/0x50 arch/x86/kernel/stacktrace.c:67 [< inline >] kmsan_save_stack_with_flags mm/kmsan/kmsan.c:322 [< inline >] kmsan_save_stack mm/kmsan/kmsan.c:334 [] kmsan_internal_chain_origin+0x118/0x1e0 mm/kmsan/kmsan.c:527 [] __msan_set_alloca_origin4+0xc3/0x130 mm/kmsan/kmsan_instr.c:380 [] SYSC_bind+0x129/0x5f0 net/socket.c:1356 [] SyS_bind+0x82/0xa0 net/socket.c:1356 [] entry_SYSCALL_64_fastpath+0x13/0x8f arch/x86/entry/entry_64.o:? origin description: ----address@SYSC_bind (origin=00000000eb400911) ================================================================== (the line numbers are relative to 4.8-rc6, but the bug persists upstream) , when I run the following program as root: ===================================== #include #include #include #include int main() { struct sockaddr addr; memset(&addr, 0xff, sizeof(addr)); addr.sa_family = AF_PACKET; int fd = socket(PF_PACKET, SOCK_PACKET, htons(ETH_P_ALL)); bind(fd, &addr, sizeof(addr)); return 0; } ===================================== This happens because addr.sa_data copied from the userspace is not zero-terminated, and copying it with strlcpy() in packet_bind_spkt() results in calling strlen() on the kernel copy of that non-terminated buffer. Signed-off-by: Alexander Potapenko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 34de326b4f09..f2b04a77258d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3140,7 +3140,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - char name[15]; + char name[sizeof(uaddr->sa_data) + 1]; /* * Check legality @@ -3148,7 +3148,11 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, if (addr_len != sizeof(struct sockaddr)) return -EINVAL; - strlcpy(name, uaddr->sa_data, sizeof(name)); + /* uaddr->sa_data comes from the userspace, it's not guaranteed to be + * zero-terminated. + */ + memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); + name[sizeof(uaddr->sa_data)] = 0; return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); } From 3d87dce3dfd665a892d107b797e68697204c3e43 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Mar 2017 14:28:39 -0800 Subject: [PATCH 280/357] net: net_enable_timestamp() can be called from irq contexts [ Upstream commit 13baa00ad01bb3a9f893e3a08cbc2d072fc0c15d ] It is now very clear that silly TCP listeners might play with enabling/disabling timestamping while new children are added to their accept queue. Meaning net_enable_timestamp() can be called from BH context while current state of the static key is not enabled. Lets play safe and allow all contexts. The work queue is scheduled only under the problematic cases, which are the static key enable/disable transition, to not slow down critical paths. This extends and improves what we did in commit 5fa8bbda38c6 ("net: use a work queue to defer net_disable_timestamp() work") Fixes: b90e5794c5bd ("net: dont call jump_label_dec from irq context") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 60b0a6049e72..2e04fd188081 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1697,27 +1697,54 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue); static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL static atomic_t netstamp_needed_deferred; +static atomic_t netstamp_wanted; static void netstamp_clear(struct work_struct *work) { int deferred = atomic_xchg(&netstamp_needed_deferred, 0); + int wanted; - while (deferred--) - static_key_slow_dec(&netstamp_needed); + wanted = atomic_add_return(deferred, &netstamp_wanted); + if (wanted > 0) + static_key_enable(&netstamp_needed); + else + static_key_disable(&netstamp_needed); } static DECLARE_WORK(netstamp_work, netstamp_clear); #endif void net_enable_timestamp(void) { +#ifdef HAVE_JUMP_LABEL + int wanted; + + while (1) { + wanted = atomic_read(&netstamp_wanted); + if (wanted <= 0) + break; + if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted) + return; + } + atomic_inc(&netstamp_needed_deferred); + schedule_work(&netstamp_work); +#else static_key_slow_inc(&netstamp_needed); +#endif } EXPORT_SYMBOL(net_enable_timestamp); void net_disable_timestamp(void) { #ifdef HAVE_JUMP_LABEL - /* net_disable_timestamp() can be called from non process context */ - atomic_inc(&netstamp_needed_deferred); + int wanted; + + while (1) { + wanted = atomic_read(&netstamp_wanted); + if (wanted <= 1) + break; + if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted) + return; + } + atomic_dec(&netstamp_needed_deferred); schedule_work(&netstamp_work); #else static_key_slow_dec(&netstamp_needed); From 0bcc319d554cbc2414b721d773b775b3cade9bd8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Mar 2017 14:45:06 -0800 Subject: [PATCH 281/357] ipv6: orphan skbs in reassembly unit [ Upstream commit 48cac18ecf1de82f76259a54402c3adb7839ad01 ] Andrey reported a use-after-free in IPv6 stack. Issue here is that we free the socket while it still has skb in TX path and in some queues. It happens here because IPv6 reassembly unit messes skb->truesize, breaking skb_set_owner_w() badly. We fixed a similar issue for IPV4 in commit 8282f27449bf ("inet: frag: Always orphan skbs inside ip_defrag()") Acked-by: Joe Stringer ================================================================== BUG: KASAN: use-after-free in sock_wfree+0x118/0x120 Read of size 8 at addr ffff880062da0060 by task a.out/4140 page:ffffea00018b6800 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 flags: 0x100000000008100(slab|head) raw: 0100000000008100 0000000000000000 0000000000000000 0000000180130013 raw: dead000000000100 dead000000000200 ffff88006741f140 0000000000000000 page dumped because: kasan: bad access detected CPU: 0 PID: 4140 Comm: a.out Not tainted 4.10.0-rc3+ #59 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 dump_stack+0x292/0x398 lib/dump_stack.c:51 describe_address mm/kasan/report.c:262 kasan_report_error+0x121/0x560 mm/kasan/report.c:370 kasan_report mm/kasan/report.c:392 __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:413 sock_flag ./arch/x86/include/asm/bitops.h:324 sock_wfree+0x118/0x120 net/core/sock.c:1631 skb_release_head_state+0xfc/0x250 net/core/skbuff.c:655 skb_release_all+0x15/0x60 net/core/skbuff.c:668 __kfree_skb+0x15/0x20 net/core/skbuff.c:684 kfree_skb+0x16e/0x4e0 net/core/skbuff.c:705 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304 inet_frag_put ./include/net/inet_frag.h:133 nf_ct_frag6_gather+0x1125/0x38b0 net/ipv6/netfilter/nf_conntrack_reasm.c:617 ipv6_defrag+0x21b/0x350 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68 nf_hook_entry_hookfn ./include/linux/netfilter.h:102 nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310 nf_hook ./include/linux/netfilter.h:212 __ip6_local_out+0x52c/0xaf0 net/ipv6/output_core.c:160 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742 rawv6_push_pending_frames net/ipv6/raw.c:613 rawv6_sendmsg+0x2cff/0x4130 net/ipv6/raw.c:927 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 sock_sendmsg_nosec net/socket.c:635 sock_sendmsg+0xca/0x110 net/socket.c:645 sock_write_iter+0x326/0x620 net/socket.c:848 new_sync_write fs/read_write.c:499 __vfs_write+0x483/0x760 fs/read_write.c:512 vfs_write+0x187/0x530 fs/read_write.c:560 SYSC_write fs/read_write.c:607 SyS_write+0xfb/0x230 fs/read_write.c:599 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 RIP: 0033:0x7ff26e6f5b79 RSP: 002b:00007ff268e0ed98 EFLAGS: 00000206 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007ff268e0f9c0 RCX: 00007ff26e6f5b79 RDX: 0000000000000010 RSI: 0000000020f50fe1 RDI: 0000000000000003 RBP: 00007ff26ebc1220 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000 R13: 00007ff268e0f9c0 R14: 00007ff26efec040 R15: 0000000000000003 The buggy address belongs to the object at ffff880062da0000 which belongs to the cache RAWv6 of size 1504 The buggy address ffff880062da0060 is located 96 bytes inside of 1504-byte region [ffff880062da0000, ffff880062da05e0) Freed by task 4113: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:578 slab_free_hook mm/slub.c:1352 slab_free_freelist_hook mm/slub.c:1374 slab_free mm/slub.c:2951 kmem_cache_free+0xb2/0x2c0 mm/slub.c:2973 sk_prot_free net/core/sock.c:1377 __sk_destruct+0x49c/0x6e0 net/core/sock.c:1452 sk_destruct+0x47/0x80 net/core/sock.c:1460 __sk_free+0x57/0x230 net/core/sock.c:1468 sk_free+0x23/0x30 net/core/sock.c:1479 sock_put ./include/net/sock.h:1638 sk_common_release+0x31e/0x4e0 net/core/sock.c:2782 rawv6_close+0x54/0x80 net/ipv6/raw.c:1214 inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425 inet6_release+0x50/0x70 net/ipv6/af_inet6.c:431 sock_release+0x8d/0x1e0 net/socket.c:599 sock_close+0x16/0x20 net/socket.c:1063 __fput+0x332/0x7f0 fs/file_table.c:208 ____fput+0x15/0x20 fs/file_table.c:244 task_work_run+0x19b/0x270 kernel/task_work.c:116 exit_task_work ./include/linux/task_work.h:21 do_exit+0x186b/0x2800 kernel/exit.c:839 do_group_exit+0x149/0x420 kernel/exit.c:943 SYSC_exit_group kernel/exit.c:954 SyS_exit_group+0x1d/0x20 kernel/exit.c:952 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 Allocated by task 4115: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:605 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:544 slab_post_alloc_hook mm/slab.h:432 slab_alloc_node mm/slub.c:2708 slab_alloc mm/slub.c:2716 kmem_cache_alloc+0x1af/0x250 mm/slub.c:2721 sk_prot_alloc+0x65/0x2a0 net/core/sock.c:1334 sk_alloc+0x105/0x1010 net/core/sock.c:1396 inet6_create+0x44d/0x1150 net/ipv6/af_inet6.c:183 __sock_create+0x4f6/0x880 net/socket.c:1199 sock_create net/socket.c:1239 SYSC_socket net/socket.c:1269 SyS_socket+0xf9/0x230 net/socket.c:1249 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 Memory state around the buggy address: ffff880062d9ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff880062d9ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff880062da0000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff880062da0080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff880062da0100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Reported-by: Andrey Konovalov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/netfilter/nf_conntrack_reasm.c | 1 + net/openvswitch/conntrack.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 9948b5ce52da..986d4ca38832 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -589,6 +589,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); + skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); if (fq == NULL) { diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index eab210bb1ef0..48386bff8b4e 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -367,7 +367,6 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, } else if (key->eth.type == htons(ETH_P_IPV6)) { enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; - skb_orphan(skb); memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); err = nf_ct_frag6_gather(net, skb, user); if (err) { From 51ae1fbcf17325d47c20f65fb968be2d2d12fed0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 1 Mar 2017 16:35:07 -0300 Subject: [PATCH 282/357] dccp: Unlock sock before calling sk_free() [ Upstream commit d5afb6f9b6bb2c57bd0c05e76e12489dc0d037d9 ] The code where sk_clone() came from created a new socket and locked it, but then, on the error path didn't unlock it. This problem stayed there for a long while, till b0691c8ee7c2 ("net: Unlock sock before calling sk_free()") fixed it, but unfortunately the callers of sk_clone() (now sk_clone_locked()) were not audited and the one in dccp_create_openreq_child() remained. Now in the age of the syskaller fuzzer, this was finally uncovered, as reported by Dmitry: ---- 8< ---- I've got the following report while running syzkaller fuzzer on 86292b33d4b7 ("Merge branch 'akpm' (patches from Andrew)") [ BUG: held lock freed! ] 4.10.0+ #234 Not tainted ------------------------- syz-executor6/6898 is freeing memory ffff88006286cac0-ffff88006286d3b7, with a lock still held there! (slock-AF_INET6){+.-...}, at: [] spin_lock include/linux/spinlock.h:299 [inline] (slock-AF_INET6){+.-...}, at: [] sk_clone_lock+0x3d9/0x12c0 net/core/sock.c:1504 5 locks held by syz-executor6/6898: #0: (sk_lock-AF_INET6){+.+.+.}, at: [] lock_sock include/net/sock.h:1460 [inline] #0: (sk_lock-AF_INET6){+.+.+.}, at: [] inet_stream_connect+0x44/0xa0 net/ipv4/af_inet.c:681 #1: (rcu_read_lock){......}, at: [] inet6_csk_xmit+0x12a/0x5d0 net/ipv6/inet6_connection_sock.c:126 #2: (rcu_read_lock){......}, at: [] __skb_unlink include/linux/skbuff.h:1767 [inline] #2: (rcu_read_lock){......}, at: [] __skb_dequeue include/linux/skbuff.h:1783 [inline] #2: (rcu_read_lock){......}, at: [] process_backlog+0x264/0x730 net/core/dev.c:4835 #3: (rcu_read_lock){......}, at: [] ip6_input_finish+0x0/0x1700 net/ipv6/ip6_input.c:59 #4: (slock-AF_INET6){+.-...}, at: [] spin_lock include/linux/spinlock.h:299 [inline] #4: (slock-AF_INET6){+.-...}, at: [] sk_clone_lock+0x3d9/0x12c0 net/core/sock.c:1504 Fix it just like was done by b0691c8ee7c2 ("net: Unlock sock before calling sk_free()"). Reported-by: Dmitry Vyukov Cc: Cong Wang Cc: Eric Dumazet Cc: Gerrit Renker Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170301153510.GE15145@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/minisocks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 53eddf99e4f6..d20d948a98ed 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -122,6 +122,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk, /* It is still raw copy of parent, so invalidate * destructor and make plain sk_free() */ newsk->sk_destruct = NULL; + bh_unlock_sock(newsk); sk_free(newsk); return NULL; } From 4547f03d1a624d7e92c65077b7cb5b2a33839cb3 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 3 Mar 2017 12:21:14 -0800 Subject: [PATCH 283/357] strparser: destroy workqueue on module exit [ Upstream commit f78ef7cd9a0686b979679d0de061c6dbfd8d649e ] Fixes: 43a0c6751a32 ("strparser: Stream parser for messages") Cc: Tom Herbert Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/strparser/strparser.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 41adf362936d..b5c279b22680 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -504,6 +504,7 @@ static int __init strp_mod_init(void) static void __exit strp_mod_exit(void) { + destroy_workqueue(strp_wq); } module_init(strp_mod_init); module_exit(strp_mod_exit); From 07753bc6a2816c1c3b9f7bff133251f623c7bc91 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Mar 2017 14:08:21 -0800 Subject: [PATCH 284/357] tcp: fix various issues for sockets morphing to listen state [ Upstream commit 02b2faaf0af1d85585f6d6980e286d53612acfc2 ] Dmitry Vyukov reported a divide by 0 triggered by syzkaller, exploiting tcp_disconnect() path that was never really considered and/or used before syzkaller ;) I was not able to reproduce the bug, but it seems issues here are the three possible actions that assumed they would never trigger on a listener. 1) tcp_write_timer_handler 2) tcp_delack_timer_handler 3) MTU reduction Only IPv6 MTU reduction was properly testing TCP_CLOSE and TCP_LISTEN states from tcp_v6_mtu_reduced() Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c | 7 +++++-- net/ipv4/tcp_timer.c | 6 ++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2259114c7242..5bb809f12ba1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -269,10 +269,13 @@ EXPORT_SYMBOL(tcp_v4_connect); */ void tcp_v4_mtu_reduced(struct sock *sk) { - struct dst_entry *dst; struct inet_sock *inet = inet_sk(sk); - u32 mtu = tcp_sk(sk)->mtu_info; + struct dst_entry *dst; + u32 mtu; + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) + return; + mtu = tcp_sk(sk)->mtu_info; dst = inet_csk_update_pmtu(sk, mtu); if (!dst) return; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 3ea1cf804748..b1e65b3b4361 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -249,7 +249,8 @@ void tcp_delack_timer_handler(struct sock *sk) sk_mem_reclaim_partial(sk); - if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) + if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || + !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) goto out; if (time_after(icsk->icsk_ack.timeout, jiffies)) { @@ -552,7 +553,8 @@ void tcp_write_timer_handler(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); int event; - if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) + if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || + !icsk->icsk_pending) goto out; if (time_after(icsk->icsk_timeout, jiffies)) { From 98fa3d2a8e399c40f681bca83f8ab522657c16db Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Mar 2017 21:01:02 -0800 Subject: [PATCH 285/357] net: fix socket refcounting in skb_complete_wifi_ack() [ Upstream commit dd4f10722aeb10f4f582948839f066bebe44e5fb ] TX skbs do not necessarily hold a reference on skb->sk->sk_refcnt By the time TX completion happens, sk_refcnt might be already 0. sock_hold()/sock_put() would then corrupt critical state, like sk_wmem_alloc. Fixes: bf7fa551e0ce ("mac80211: Resolve sk_refcnt/sk_wmem_alloc issue in wifi ack path") Signed-off-by: Eric Dumazet Cc: Alexander Duyck Cc: Johannes Berg Cc: Soheil Hassas Yeganeh Cc: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1e3e0087245b..b806677f9fd3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3871,7 +3871,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) { struct sock *sk = skb->sk; struct sock_exterr_skb *serr; - int err; + int err = 1; skb->wifi_acked_valid = 1; skb->wifi_acked = acked; @@ -3881,14 +3881,15 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS; - /* take a reference to prevent skb_orphan() from freeing the socket */ - sock_hold(sk); - - err = sock_queue_err_skb(sk, skb); + /* Take a reference to prevent skb_orphan() from freeing the socket, + * but only if the socket refcount is not zero. + */ + if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { + err = sock_queue_err_skb(sk, skb); + sock_put(sk); + } if (err) kfree_skb(skb); - - sock_put(sk); } EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); From f157cc1d7251403c950d960e3bcf988c14d9deda Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Mar 2017 21:01:03 -0800 Subject: [PATCH 286/357] net: fix socket refcounting in skb_complete_tx_timestamp() [ Upstream commit 9ac25fc063751379cb77434fef9f3b088cd3e2f7 ] TX skbs do not necessarily hold a reference on skb->sk->sk_refcnt By the time TX completion happens, sk_refcnt might be already 0. sock_hold()/sock_put() would then corrupt critical state, like sk_wmem_alloc and lead to leaks or use after free. Fixes: 62bccb8cdb69 ("net-timestamp: Make the clone operation stand-alone from phy timestamping") Signed-off-by: Eric Dumazet Cc: Alexander Duyck Cc: Johannes Berg Cc: Soheil Hassas Yeganeh Cc: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b806677f9fd3..f0f462c0573d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3814,13 +3814,14 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, if (!skb_may_tx_timestamp(sk, false)) return; - /* take a reference to prevent skb_orphan() from freeing the socket */ - sock_hold(sk); - - *skb_hwtstamps(skb) = *hwtstamps; - __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); - - sock_put(sk); + /* Take a reference to prevent skb_orphan() from freeing the socket, + * but only if the socket refcount is not zero. + */ + if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { + *skb_hwtstamps(skb) = *hwtstamps; + __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); + sock_put(sk); + } } EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); From 5f79aab41dedaa869ef3c706e4c0872318665884 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sun, 5 Mar 2017 03:01:55 +0300 Subject: [PATCH 287/357] net/sched: act_skbmod: remove unneeded rcu_read_unlock in tcf_skbmod_dump [ Upstream commit 6c4dc75c251721f517e9daeb5370ea606b5b35ce ] Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_skbmod.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index e7d96381c908..f85313d60a4d 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -228,7 +228,6 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, return skb->len; nla_put_failure: - rcu_read_unlock(); nlmsg_trim(skb, b); return -1; } From 7c0eaeec84d14d4b85773ccac8bffe8b2eafea7c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 5 Mar 2017 10:52:16 -0800 Subject: [PATCH 288/357] dccp: fix use-after-free in dccp_feat_activate_values [ Upstream commit 62f8f4d9066c1c6f2474845d1ca7e2891f2ae3fd ] Dmitry reported crashes in DCCP stack [1] Problem here is that when I got rid of listener spinlock, I missed the fact that DCCP stores a complex state in struct dccp_request_sock, while TCP does not. Since multiple cpus could access it at the same time, we need to add protection. [1] BUG: KASAN: use-after-free in dccp_feat_activate_values+0x967/0xab0 net/dccp/feat.c:1541 at addr ffff88003713be68 Read of size 8 by task syz-executor2/8457 CPU: 2 PID: 8457 Comm: syz-executor2 Not tainted 4.10.0-rc7+ #127 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x292/0x398 lib/dump_stack.c:51 kasan_object_err+0x1c/0x70 mm/kasan/report.c:162 print_address_description mm/kasan/report.c:200 [inline] kasan_report_error mm/kasan/report.c:289 [inline] kasan_report.part.1+0x20e/0x4e0 mm/kasan/report.c:311 kasan_report mm/kasan/report.c:332 [inline] __asan_report_load8_noabort+0x29/0x30 mm/kasan/report.c:332 dccp_feat_activate_values+0x967/0xab0 net/dccp/feat.c:1541 dccp_create_openreq_child+0x464/0x610 net/dccp/minisocks.c:121 dccp_v6_request_recv_sock+0x1f6/0x1960 net/dccp/ipv6.c:457 dccp_check_req+0x335/0x5a0 net/dccp/minisocks.c:186 dccp_v6_rcv+0x69e/0x1d00 net/dccp/ipv6.c:711 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279 NF_HOOK include/linux/netfilter.h:257 [inline] ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322 dst_input include/net/dst.h:507 [inline] ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69 NF_HOOK include/linux/netfilter.h:257 [inline] ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228 process_backlog+0xe5/0x6c0 net/core/dev.c:4839 napi_poll net/core/dev.c:5202 [inline] net_rx_action+0xe70/0x1900 net/core/dev.c:5267 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902 do_softirq.part.17+0x1e8/0x230 kernel/softirq.c:328 do_softirq kernel/softirq.c:176 [inline] __local_bh_enable_ip+0x1f2/0x200 kernel/softirq.c:181 local_bh_enable include/linux/bottom_half.h:31 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:971 [inline] ip6_finish_output2+0xbb0/0x23d0 net/ipv6/ip6_output.c:123 ip6_finish_output+0x302/0x960 net/ipv6/ip6_output.c:148 NF_HOOK_COND include/linux/netfilter.h:246 [inline] ip6_output+0x1cb/0x8d0 net/ipv6/ip6_output.c:162 ip6_xmit+0xcdf/0x20d0 include/net/dst.h:501 inet6_csk_xmit+0x320/0x5f0 net/ipv6/inet6_connection_sock.c:179 dccp_transmit_skb+0xb09/0x1120 net/dccp/output.c:141 dccp_xmit_packet+0x215/0x760 net/dccp/output.c:280 dccp_write_xmit+0x168/0x1d0 net/dccp/output.c:362 dccp_sendmsg+0x79c/0xb10 net/dccp/proto.c:796 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 sock_sendmsg_nosec net/socket.c:635 [inline] sock_sendmsg+0xca/0x110 net/socket.c:645 SYSC_sendto+0x660/0x810 net/socket.c:1687 SyS_sendto+0x40/0x50 net/socket.c:1655 entry_SYSCALL_64_fastpath+0x1f/0xc2 RIP: 0033:0x4458b9 RSP: 002b:00007f8ceb77bb58 EFLAGS: 00000282 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000017 RCX: 00000000004458b9 RDX: 0000000000000023 RSI: 0000000020e60000 RDI: 0000000000000017 RBP: 00000000006e1b90 R08: 00000000200f9fe1 R09: 0000000000000020 R10: 0000000000008010 R11: 0000000000000282 R12: 00000000007080a8 R13: 0000000000000000 R14: 00007f8ceb77c9c0 R15: 00007f8ceb77c700 Object at ffff88003713be50, in cache kmalloc-64 size: 64 Allocated: PID = 8446 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:605 kmem_cache_alloc_trace+0x82/0x270 mm/slub.c:2738 kmalloc include/linux/slab.h:490 [inline] dccp_feat_entry_new+0x214/0x410 net/dccp/feat.c:467 dccp_feat_push_change+0x38/0x220 net/dccp/feat.c:487 __feat_register_sp+0x223/0x2f0 net/dccp/feat.c:741 dccp_feat_propagate_ccid+0x22b/0x2b0 net/dccp/feat.c:949 dccp_feat_server_ccid_dependencies+0x1b3/0x250 net/dccp/feat.c:1012 dccp_make_response+0x1f1/0xc90 net/dccp/output.c:423 dccp_v6_send_response+0x4ec/0xc20 net/dccp/ipv6.c:217 dccp_v6_conn_request+0xaba/0x11b0 net/dccp/ipv6.c:377 dccp_rcv_state_process+0x51e/0x1650 net/dccp/input.c:606 dccp_v6_do_rcv+0x213/0x350 net/dccp/ipv6.c:632 sk_backlog_rcv include/net/sock.h:893 [inline] __sk_receive_skb+0x36f/0xcc0 net/core/sock.c:479 dccp_v6_rcv+0xba5/0x1d00 net/dccp/ipv6.c:742 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279 NF_HOOK include/linux/netfilter.h:257 [inline] ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322 dst_input include/net/dst.h:507 [inline] ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69 NF_HOOK include/linux/netfilter.h:257 [inline] ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228 process_backlog+0xe5/0x6c0 net/core/dev.c:4839 napi_poll net/core/dev.c:5202 [inline] net_rx_action+0xe70/0x1900 net/core/dev.c:5267 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 Freed: PID = 15 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 [inline] kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:578 slab_free_hook mm/slub.c:1355 [inline] slab_free_freelist_hook mm/slub.c:1377 [inline] slab_free mm/slub.c:2954 [inline] kfree+0xe8/0x2b0 mm/slub.c:3874 dccp_feat_entry_destructor.part.4+0x48/0x60 net/dccp/feat.c:418 dccp_feat_entry_destructor net/dccp/feat.c:416 [inline] dccp_feat_list_pop net/dccp/feat.c:541 [inline] dccp_feat_activate_values+0x57f/0xab0 net/dccp/feat.c:1543 dccp_create_openreq_child+0x464/0x610 net/dccp/minisocks.c:121 dccp_v6_request_recv_sock+0x1f6/0x1960 net/dccp/ipv6.c:457 dccp_check_req+0x335/0x5a0 net/dccp/minisocks.c:186 dccp_v6_rcv+0x69e/0x1d00 net/dccp/ipv6.c:711 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279 NF_HOOK include/linux/netfilter.h:257 [inline] ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322 dst_input include/net/dst.h:507 [inline] ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69 NF_HOOK include/linux/netfilter.h:257 [inline] ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228 process_backlog+0xe5/0x6c0 net/core/dev.c:4839 napi_poll net/core/dev.c:5202 [inline] net_rx_action+0xe70/0x1900 net/core/dev.c:5267 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 Memory state around the buggy address: ffff88003713bd00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88003713bd80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88003713be00: fc fc fc fc fc fc fc fc fc fc fb fb fb fb fb fb ^ Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/dccp.h | 1 + net/dccp/minisocks.c | 24 ++++++++++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 61d042bbbf60..68449293c4b6 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -163,6 +163,7 @@ struct dccp_request_sock { __u64 dreq_isr; __u64 dreq_gsr; __be32 dreq_service; + spinlock_t dreq_lock; struct list_head dreq_featneg; __u32 dreq_timestamp_echo; __u32 dreq_timestamp_time; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index d20d948a98ed..39e7e2bca8db 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -146,6 +146,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, struct dccp_request_sock *dreq = dccp_rsk(req); bool own_req; + /* TCP/DCCP listeners became lockless. + * DCCP stores complex state in its request_sock, so we need + * a protection for them, now this code runs without being protected + * by the parent (listener) lock. + */ + spin_lock_bh(&dreq->dreq_lock); + /* Check for retransmitted REQUEST */ if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { @@ -160,7 +167,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, inet_rtx_syn_ack(sk, req); } /* Network Duplicate, discard packet */ - return NULL; + goto out; } DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; @@ -186,20 +193,20 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, req, &own_req); - if (!child) - goto listen_overflow; + if (child) { + child = inet_csk_complete_hashdance(sk, child, req, own_req); + goto out; + } - return inet_csk_complete_hashdance(sk, child, req, own_req); - -listen_overflow: - dccp_pr_debug("listen_overflow!\n"); DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; drop: if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) req->rsk_ops->send_reset(sk, skb); inet_csk_reqsk_queue_drop(sk, req); - return NULL; +out: + spin_unlock_bh(&dreq->dreq_lock); + return child; } EXPORT_SYMBOL_GPL(dccp_check_req); @@ -250,6 +257,7 @@ int dccp_reqsk_init(struct request_sock *req, { struct dccp_request_sock *dreq = dccp_rsk(req); + spin_lock_init(&dreq->dreq_lock); inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport; inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport); inet_rsk(req)->acked = 0; From db6e7796186a8d9a41e9d14b4a824c9cb80aaaf4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 6 Mar 2017 08:53:04 -0800 Subject: [PATCH 289/357] vrf: Fix use-after-free in vrf_xmit [ Upstream commit f7887d40e541f74402df0684a1463c0a0bb68c68 ] KASAN detected a use-after-free: [ 269.467067] BUG: KASAN: use-after-free in vrf_xmit+0x7f1/0x827 [vrf] at addr ffff8800350a21c0 [ 269.467067] Read of size 4 by task ssh/1879 [ 269.467067] CPU: 1 PID: 1879 Comm: ssh Not tainted 4.10.0+ #249 [ 269.467067] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 [ 269.467067] Call Trace: [ 269.467067] dump_stack+0x81/0xb6 [ 269.467067] kasan_object_err+0x21/0x78 [ 269.467067] kasan_report+0x2f7/0x450 [ 269.467067] ? vrf_xmit+0x7f1/0x827 [vrf] [ 269.467067] ? ip_output+0xa4/0xdb [ 269.467067] __asan_load4+0x6b/0x6d [ 269.467067] vrf_xmit+0x7f1/0x827 [vrf] ... Which corresponds to the skb access after xmit handling. Fix by saving skb->len and using the saved value to update stats. Fixes: 193125dbd8eb2 ("net: Introduce VRF device driver") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vrf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 95cf1d844781..bc744acabf98 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -346,6 +346,7 @@ static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev) static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) { + int len = skb->len; netdev_tx_t ret = is_ip_tx_frame(skb, dev); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { @@ -353,7 +354,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) u64_stats_update_begin(&dstats->syncp); dstats->tx_pkts++; - dstats->tx_bytes += skb->len; + dstats->tx_bytes += len; u64_stats_update_end(&dstats->syncp); } else { this_cpu_inc(dev->dstats->tx_drps); From b07eed8f7119d43cbfe4d2d66241bd04099e6585 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 7 Mar 2017 18:33:31 +0100 Subject: [PATCH 290/357] net/tunnel: set inner protocol in network gro hooks [ Upstream commit 294acf1c01bace5cea5d30b510504238bf5f7c25 ] The gso code of several tunnels type (gre and udp tunnels) takes for granted that the skb->inner_protocol is properly initialized and drops the packet elsewhere. On the forwarding path no one is initializing such field, so gro encapsulated packets are dropped on forward. Since commit 38720352412a ("gre: Use inner_proto to obtain inner header protocol"), this can be reproduced when the encapsulated packets use gre as the tunneling protocol. The issue happens also with vxlan and geneve tunnels since commit 8bce6d7d0d1e ("udp: Generalize skb_udp_segment"), if the forwarding host's ingress nic has h/w offload for such tunnel and a vxlan/geneve device is configured on top of it, regardless of the configured peer address and vni. To address the issue, this change initialize the inner_protocol field for encapsulated packets in both ipv4 and ipv6 gro complete callbacks. Fixes: 38720352412a ("gre: Use inner_proto to obtain inner header protocol") Fixes: 8bce6d7d0d1e ("udp: Generalize skb_udp_segment") Signed-off-by: Paolo Abeni Acked-by: Alexander Duyck Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/af_inet.c | 4 +++- net/ipv6/ip6_offload.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 215143246e4b..971b9471d427 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1460,8 +1460,10 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff) int proto = iph->protocol; int err = -ENOSYS; - if (skb->encapsulation) + if (skb->encapsulation) { + skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP)); skb_set_inner_network_header(skb, nhoff); + } csum_replace2(&iph->check, iph->tot_len, newlen); iph->tot_len = newlen; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index fc7b4017ba24..33b04ec2744a 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -294,8 +294,10 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff); int err = -ENOSYS; - if (skb->encapsulation) + if (skb->encapsulation) { + skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6)); skb_set_inner_network_header(skb, nhoff); + } iph->payload_len = htons(skb->len - nhoff - sizeof(*iph)); From ccb65adc6ca6f1cf80cae474a4b8c8f6e6c36168 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 7 Mar 2017 23:50:50 +0300 Subject: [PATCH 291/357] uapi: fix linux/packet_diag.h userspace compilation error [ Upstream commit 745cb7f8a5de0805cade3de3991b7a95317c7c73 ] Replace MAX_ADDR_LEN with its numeric value to fix the following linux/packet_diag.h userspace compilation error: /usr/include/linux/packet_diag.h:67:17: error: 'MAX_ADDR_LEN' undeclared here (not in a function) __u8 pdmc_addr[MAX_ADDR_LEN]; This is not the first case in the UAPI where the numeric value of MAX_ADDR_LEN is used instead of symbolic one, uapi/linux/if_link.h already does the same: $ grep MAX_ADDR_LEN include/uapi/linux/if_link.h __u8 mac[32]; /* MAX_ADDR_LEN */ There are no UAPI headers besides these two that use MAX_ADDR_LEN. Signed-off-by: Dmitry V. Levin Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/packet_diag.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h index d08c63f3dd6f..0c5d5dd61b6a 100644 --- a/include/uapi/linux/packet_diag.h +++ b/include/uapi/linux/packet_diag.h @@ -64,7 +64,7 @@ struct packet_diag_mclist { __u32 pdmc_count; __u16 pdmc_type; __u16 pdmc_alen; - __u8 pdmc_addr[MAX_ADDR_LEN]; + __u8 pdmc_addr[32]; /* MAX_ADDR_LEN */ }; struct packet_diag_ring { From 47c8dc47c0080c86c9aba36f58f6fb4a7bf84c0c Mon Sep 17 00:00:00 2001 From: Etienne Noss Date: Fri, 10 Mar 2017 16:55:32 +0100 Subject: [PATCH 292/357] act_connmark: avoid crashing on malformed nlattrs with null parms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 52491c7607c5527138095edf44c53169dc1ddb82 ] tcf_connmark_init does not check in its configuration if TCA_CONNMARK_PARMS is set, resulting in a null pointer dereference when trying to access it. [501099.043007] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 [501099.043039] IP: [] tcf_connmark_init+0x8b/0x180 [act_connmark] ... [501099.044334] Call Trace: [501099.044345] [] ? tcf_action_init_1+0x198/0x1b0 [501099.044363] [] ? tcf_action_init+0xb0/0x120 [501099.044380] [] ? tcf_exts_validate+0xc4/0x110 [501099.044398] [] ? u32_set_parms+0xa7/0x270 [cls_u32] [501099.044417] [] ? u32_change+0x680/0x87b [cls_u32] [501099.044436] [] ? tc_ctl_tfilter+0x4dd/0x8a0 [501099.044454] [] ? security_capable+0x41/0x60 [501099.044471] [] ? rtnetlink_rcv_msg+0xe1/0x220 [501099.044490] [] ? rtnl_newlink+0x870/0x870 [501099.044507] [] ? netlink_rcv_skb+0xa1/0xc0 [501099.044524] [] ? rtnetlink_rcv+0x24/0x30 [501099.044541] [] ? netlink_unicast+0x184/0x230 [501099.044558] [] ? netlink_sendmsg+0x2f8/0x3b0 [501099.044576] [] ? sock_sendmsg+0x30/0x40 [501099.044592] [] ? SYSC_sendto+0xd3/0x150 [501099.044608] [] ? __do_page_fault+0x2d1/0x510 [501099.044626] [] ? system_call_fast_compare_end+0xc/0x9b Fixes: 22a5dc0e5e3e ("net: sched: Introduce connmark action") Signed-off-by: Étienne Noss Signed-off-by: Victorien Molle Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_connmark.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index eae07a2e774d..1191179c0341 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -113,6 +113,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, if (ret < 0) return ret; + if (!tb[TCA_CONNMARK_PARMS]) + return -EINVAL; + parm = nla_data(tb[TCA_CONNMARK_PARMS]); if (!tcf_hash_check(tn, parm->index, a, bind)) { From b61206e253020adbfde31f662ec7b5ea8b54935f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 10 Mar 2017 09:46:15 -0800 Subject: [PATCH 293/357] mpls: Send route delete notifications when router module is unloaded [ Upstream commit e37791ec1ad785b59022ae211f63a16189bacebf ] When the mpls_router module is unloaded, mpls routes are deleted but notifications are not sent to userspace leaving userspace caches out of sync. Add the call to mpls_notify_route in mpls_net_exit as routes are freed. Fixes: 0189197f44160 ("mpls: Basic routing support") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mpls/af_mpls.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 5b77377e5a15..2a21ff14c07d 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1696,6 +1696,7 @@ static void mpls_net_exit(struct net *net) for (index = 0; index < platform_labels; index++) { struct mpls_route *rt = rtnl_dereference(platform_label[index]); RCU_INIT_POINTER(platform_label[index], NULL); + mpls_notify_route(net, index, rt, NULL, NULL); mpls_rt_free(rt); } rtnl_unlock(); From 87c0286a07f93edc874c861a9159e586fc51ee6b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 10 Mar 2017 14:11:39 -0800 Subject: [PATCH 294/357] mpls: Do not decrement alive counter for unregister events [ Upstream commit 79099aab38c8f5c746748b066ae74ba984fe2cc8 ] Multipath routes can be rendered usesless when a device in one of the paths is deleted. For example: $ ip -f mpls ro ls 100 nexthop as to 200 via inet 172.16.2.2 dev virt12 nexthop as to 300 via inet 172.16.3.2 dev br0 101 nexthop as to 201 via inet6 2000:2::2 dev virt12 nexthop as to 301 via inet6 2000:3::2 dev br0 $ ip li del br0 When br0 is deleted the other hop is not considered in mpls_select_multipath because of the alive check -- rt_nhn_alive is 0. rt_nhn_alive is decremented once in mpls_ifdown when the device is taken down (NETDEV_DOWN) and again when it is deleted (NETDEV_UNREGISTER). For a 2 hop route, deleting one device drops the alive count to 0. Since devices are taken down before unregistering, the decrement on NETDEV_UNREGISTER is redundant. Fixes: c89359a42e2a4 ("mpls: support for dead routes") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mpls/af_mpls.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 2a21ff14c07d..1309e2c34764 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -956,7 +956,8 @@ static void mpls_ifdown(struct net_device *dev, int event) /* fall through */ case NETDEV_CHANGE: nh->nh_flags |= RTNH_F_LINKDOWN; - ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1; + if (event != NETDEV_UNREGISTER) + ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1; break; } if (event == NETDEV_UNREGISTER) From 4a8d3bb73a821e1923ba7157b3bb364e5aeab0c9 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 13 Mar 2017 13:28:09 +0100 Subject: [PATCH 295/357] ipv6: make ECMP route replacement less greedy [ Upstream commit 67e194007be08d071294456274dd53e0a04fdf90 ] Commit 27596472473a ("ipv6: fix ECMP route replacement") introduced a loop that removes all siblings of an ECMP route that is being replaced. However, this loop doesn't stop when it has replaced siblings, and keeps removing other routes with a higher metric. We also end up triggering the WARN_ON after the loop, because after this nsiblings < 0. Instead, stop the loop when we have taken care of all routes with the same metric as the route being replaced. Reproducer: =========== #!/bin/sh ip netns add ns1 ip netns add ns2 ip -net ns1 link set lo up for x in 0 1 2 ; do ip link add veth$x netns ns2 type veth peer name eth$x netns ns1 ip -net ns1 link set eth$x up ip -net ns2 link set veth$x up done ip -net ns1 -6 r a 2000::/64 nexthop via fe80::0 dev eth0 \ nexthop via fe80::1 dev eth1 nexthop via fe80::2 dev eth2 ip -net ns1 -6 r a 2000::/64 via fe80::42 dev eth0 metric 256 ip -net ns1 -6 r a 2000::/64 via fe80::43 dev eth0 metric 2048 echo "before replace, 3 routes" ip -net ns1 -6 r | grep -v '^fe80\|^ff00' echo ip -net ns1 -6 r c 2000::/64 nexthop via fe80::4 dev eth0 \ nexthop via fe80::5 dev eth1 nexthop via fe80::6 dev eth2 echo "after replace, only 2 routes, metric 2048 is gone" ip -net ns1 -6 r | grep -v '^fe80\|^ff00' Fixes: 27596472473a ("ipv6: fix ECMP route replacement") Signed-off-by: Sabrina Dubroca Acked-by: Nicolas Dichtel Reviewed-by: Xin Long Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_fib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index ef5485204522..8c88a37392d0 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -908,6 +908,8 @@ add: ins = &rt->dst.rt6_next; iter = *ins; while (iter) { + if (iter->rt6i_metric > rt->rt6i_metric) + break; if (rt6_qualify_for_ecmp(iter)) { *ins = iter->dst.rt6_next; fib6_purge_rt(iter, fn, info->nl_net); From 683100ed45761d56a5d2b3d79919ecdd12f8cbba Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 Mar 2017 16:24:28 +0100 Subject: [PATCH 296/357] ipv6: avoid write to a possibly cloned skb [ Upstream commit 79e49503efe53a8c51d8b695bedc8a346c5e4a87 ] ip6_fragment, in case skb has a fraglist, checks if the skb is cloned. If it is, it will move to the 'slow path' and allocates new skbs for each fragment. However, right before entering the slowpath loop, it updates the nexthdr value of the last ipv6 extension header to NEXTHDR_FRAGMENT, to account for the fragment header that will be inserted in the new ipv6-fragment skbs. In case original skb is cloned this munges nexthdr value of another skb. Avoid this by doing the nexthdr update for each of the new fragment skbs separately. This was observed with tcpdump on a bridge device where netfilter ipv6 reassembly is active: tcpdump shows malformed fragment headers as the l4 header (icmpv6, tcp, etc). is decoded as a fragment header. Cc: Hannes Frederic Sowa Reported-by: Andreas Karis Signed-off-by: Florian Westphal Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 9a87bfb2ec16..e27b8fdba5d2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -757,13 +757,14 @@ slow_path: * Fragment the datagram. */ - *prevhdr = NEXTHDR_FRAGMENT; troom = rt->dst.dev->needed_tailroom; /* * Keep copying data until we run out. */ while (left > 0) { + u8 *fragnexthdr_offset; + len = left; /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > mtu) @@ -808,6 +809,10 @@ slow_path: */ skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); + fragnexthdr_offset = skb_network_header(frag); + fragnexthdr_offset += prevhdr - skb_network_header(skb); + *fragnexthdr_offset = NEXTHDR_FRAGMENT; + /* * Build fragment header. */ From 9bce26f224d87cd454ba567eb2e01a6b0252c052 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 Mar 2017 17:38:17 +0100 Subject: [PATCH 297/357] bridge: drop netfilter fake rtable unconditionally [ Upstream commit a13b2082ece95247779b9995c4e91b4246bed023 ] Andreas reports kernel oops during rmmod of the br_netfilter module. Hannes debugged the oops down to a NULL rt6info->rt6i_indev. Problem is that br_netfilter has the nasty concept of adding a fake rtable to skb->dst; this happens in a br_netfilter prerouting hook. A second hook (in bridge LOCAL_IN) is supposed to remove these again before the skb is handed up the stack. However, on module unload hooks get unregistered which means an skb could traverse the prerouting hook that attaches the fake_rtable, while the 'fake rtable remove' hook gets removed from the hooklist immediately after. Fixes: 34666d467cbf1e2e3c7 ("netfilter: bridge: move br_netfilter out of the core") Reported-by: Andreas Karis Debugged-by: Hannes Frederic Sowa Signed-off-by: Florian Westphal Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_input.c | 1 + net/bridge/br_netfilter_hooks.c | 21 --------------------- 2 files changed, 1 insertion(+), 21 deletions(-) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 855b72fbe1da..267b46af407f 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -29,6 +29,7 @@ EXPORT_SYMBOL(br_should_route_hook); static int br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { + br_drop_fake_rtable(skb); return netif_receive_skb(skb); } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 7fbdbae58e65..aa1df1a10dd7 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -521,21 +521,6 @@ static unsigned int br_nf_pre_routing(void *priv, } -/* PF_BRIDGE/LOCAL_IN ************************************************/ -/* The packet is locally destined, which requires a real - * dst_entry, so detach the fake one. On the way up, the - * packet would pass through PRE_ROUTING again (which already - * took place when the packet entered the bridge), but we - * register an IPv4 PRE_ROUTING 'sabotage' hook that will - * prevent this from happening. */ -static unsigned int br_nf_local_in(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - br_drop_fake_rtable(skb); - return NF_ACCEPT; -} - /* PF_BRIDGE/FORWARD *************************************************/ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -905,12 +890,6 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_BRNF, }, - { - .hook = br_nf_local_in, - .pf = NFPROTO_BRIDGE, - .hooknum = NF_BR_LOCAL_IN, - .priority = NF_BR_PRI_BRNF, - }, { .hook = br_nf_forward_ip, .pf = NFPROTO_BRIDGE, From 98933eb36dd25cb6797768bafb9b67af84e80a70 Mon Sep 17 00:00:00 2001 From: Jon Maxwell Date: Fri, 10 Mar 2017 16:40:33 +1100 Subject: [PATCH 298/357] dccp/tcp: fix routing redirect race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 45caeaa5ac0b4b11784ac6f932c0ad4c6b67cda0 ] As Eric Dumazet pointed out this also needs to be fixed in IPv6. v2: Contains the IPv6 tcp/Ipv6 dccp patches as well. We have seen a few incidents lately where a dst_enty has been freed with a dangling TCP socket reference (sk->sk_dst_cache) pointing to that dst_entry. If the conditions/timings are right a crash then ensues when the freed dst_entry is referenced later on. A Common crashing back trace is: #8 [] page_fault at ffffffff8163e648 [exception RIP: __tcp_ack_snd_check+74] . . #9 [] tcp_rcv_established at ffffffff81580b64 #10 [] tcp_v4_do_rcv at ffffffff8158b54a #11 [] tcp_v4_rcv at ffffffff8158cd02 #12 [] ip_local_deliver_finish at ffffffff815668f4 #13 [] ip_local_deliver at ffffffff81566bd9 #14 [] ip_rcv_finish at ffffffff8156656d #15 [] ip_rcv at ffffffff81566f06 #16 [] __netif_receive_skb_core at ffffffff8152b3a2 #17 [] __netif_receive_skb at ffffffff8152b608 #18 [] netif_receive_skb at ffffffff8152b690 #19 [] vmxnet3_rq_rx_complete at ffffffffa015eeaf [vmxnet3] #20 [] vmxnet3_poll_rx_only at ffffffffa015f32a [vmxnet3] #21 [] net_rx_action at ffffffff8152bac2 #22 [] __do_softirq at ffffffff81084b4f #23 [] call_softirq at ffffffff8164845c #24 [] do_softirq at ffffffff81016fc5 #25 [] irq_exit at ffffffff81084ee5 #26 [] do_IRQ at ffffffff81648ff8 Of course it may happen with other NIC drivers as well. It's found the freed dst_entry here: 224 static bool tcp_in_quickack_mode(struct sock *sk)↩ 225 {↩ 226 ▹ const struct inet_connection_sock *icsk = inet_csk(sk);↩ 227 ▹ const struct dst_entry *dst = __sk_dst_get(sk);↩ 228 ↩ 229 ▹ return (dst && dst_metric(dst, RTAX_QUICKACK)) ||↩ 230 ▹ ▹ (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);↩ 231 }↩ But there are other backtraces attributed to the same freed dst_entry in netfilter code as well. All the vmcores showed 2 significant clues: - Remote hosts behind the default gateway had always been redirected to a different gateway. A rtable/dst_entry will be added for that host. Making more dst_entrys with lower reference counts. Making this more probable. - All vmcores showed a postitive LockDroppedIcmps value, e.g: LockDroppedIcmps 267 A closer look at the tcp_v4_err() handler revealed that do_redirect() will run regardless of whether user space has the socket locked. This can result in a race condition where the same dst_entry cached in sk->sk_dst_entry can be decremented twice for the same socket via: do_redirect()->__sk_dst_check()-> dst_release(). Which leads to the dst_entry being prematurely freed with another socket pointing to it via sk->sk_dst_cache and a subsequent crash. To fix this skip do_redirect() if usespace has the socket locked. Instead let the redirect take place later when user space does not have the socket locked. The dccp/IPv6 code is very similar in this respect, so fixing it there too. As Eric Garver pointed out the following commit now invalidates routes. Which can set the dst->obsolete flag so that ipv4_dst_check() returns null and triggers the dst_release(). Fixes: ceb3320610d6 ("ipv4: Kill routes during PMTU/redirect updates.") Cc: Eric Garver Cc: Hannes Sowa Signed-off-by: Jon Maxwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ipv4.c | 3 ++- net/dccp/ipv6.c | 8 +++++--- net/ipv4/tcp_ipv4.c | 3 ++- net/ipv6/tcp_ipv6.c | 8 +++++--- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index edbe59d203ef..86b0933ecd45 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -289,7 +289,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) switch (type) { case ICMP_REDIRECT: - dccp_do_redirect(skb, sk); + if (!sock_owned_by_user(sk)) + dccp_do_redirect(skb, sk); goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 7506c03a7db9..237d62c493e3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -122,10 +122,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == NDISC_REDIRECT) { - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + if (!sock_owned_by_user(sk)) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); - if (dst) - dst->ops->redirect(dst, sk, skb); + if (dst) + dst->ops->redirect(dst, sk, skb); + } goto out; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5bb809f12ba1..6988566dc72f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -421,7 +421,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) switch (type) { case ICMP_REDIRECT: - do_redirect(icmp_skb, sk); + if (!sock_owned_by_user(sk)) + do_redirect(icmp_skb, sk); goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 667396536feb..b2e61a0e8d0a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -375,10 +375,12 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == NDISC_REDIRECT) { - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + if (!sock_owned_by_user(sk)) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); - if (dst) - dst->ops->redirect(dst, sk, skb); + if (dst) + dst->ops->redirect(dst, sk, skb); + } goto out; } From beaa66cce55689076cacd3b6b290886d503eaa35 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 13 Mar 2017 00:00:26 +0100 Subject: [PATCH 299/357] tun: fix premature POLLOUT notification on tun devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b20e2d54789c6acbf6bd0efdbec2cf5fa4d90ef1 ] aszlig observed failing ssh tunnels (-w) during initialization since commit cc9da6cc4f56e0 ("ipv6: addrconf: use stable address generator for ARPHRD_NONE"). We already had reports that the mentioned commit breaks Juniper VPN connections. I can't clearly say that the Juniper VPN client has the same problem, but it is worth a try to hint to this patch. Because of the early generation of link local addresses, the kernel now can start asking for routers on the local subnet much earlier than usual. Those router solicitation packets arrive inside the ssh channels and should be transmitted to the tun fd before the configuration scripts might have upped the interface and made it ready for transmission. ssh polls on the interface and receives back a POLL_OUT. It tries to send the earily router solicitation packet to the tun interface. Unfortunately it hasn't been up'ed yet by config scripts, thus failing with -EIO. ssh doesn't retry again and considers the tun interface broken forever. Link: https://bugzilla.kernel.org/show_bug.cgi?id=121131 Fixes: cc9da6cc4f56 ("ipv6: addrconf: use stable address generator for ARPHRD_NONE") Cc: Bjørn Mork Reported-by: Valdis Kletnieks Cc: Valdis Kletnieks Reported-by: Jonas Lippuner Cc: Jonas Lippuner Reported-by: aszlig Cc: aszlig Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b31aca8146bb..a931b73393c8 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -819,7 +819,18 @@ static void tun_net_uninit(struct net_device *dev) /* Net device open. */ static int tun_net_open(struct net_device *dev) { + struct tun_struct *tun = netdev_priv(dev); + int i; + netif_tx_start_all_queues(dev); + + for (i = 0; i < tun->numqueues; i++) { + struct tun_file *tfile; + + tfile = rtnl_dereference(tun->tfiles[i]); + tfile->socket.sk->sk_write_space(tfile->socket.sk); + } + return 0; } @@ -1116,9 +1127,10 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait) if (!skb_array_empty(&tfile->tx_array)) mask |= POLLIN | POLLRDNORM; - if (sock_writeable(sk) || - (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && - sock_writeable(sk))) + if (tun->dev->flags & IFF_UP && + (sock_writeable(sk) || + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && + sock_writeable(sk)))) mask |= POLLOUT | POLLWRNORM; if (tun->dev->reg_state != NETREG_REGISTERED) From 9e38375a4b1748946ab46a317def3046bb425f1e Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 13 Mar 2017 00:01:30 +0100 Subject: [PATCH 300/357] dccp: fix memory leak during tear-down of unsuccessful connection request [ Upstream commit 72ef9c4125c7b257e3a714d62d778ab46583d6a3 ] This patch fixes a memory leak, which happens if the connection request is not fulfilled between parsing the DCCP options and handling the SYN (because e.g. the backlog is full), because we forgot to free the list of ack vectors. Reported-by: Jianwen Ji Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ccids/ccid2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index f053198e730c..5e3a7302f774 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -749,6 +749,7 @@ static void ccid2_hc_tx_exit(struct sock *sk) for (i = 0; i < hc->tx_seqbufc; i++) kfree(hc->tx_seqbuf[i]); hc->tx_seqbufc = 0; + dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); } static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) From 1411707acb85c514c603f692327c98db48127900 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 18 Oct 2016 19:51:19 +0200 Subject: [PATCH 301/357] bpf: Detect identical PTR_TO_MAP_VALUE_OR_NULL registers [ Upstream commit 57a09bf0a416700676e77102c28f9cfcb48267e0 ] A BPF program is required to check the return register of a map_elem_lookup() call before accessing memory. The verifier keeps track of this by converting the type of the result register from PTR_TO_MAP_VALUE_OR_NULL to PTR_TO_MAP_VALUE after a conditional jump ensures safety. This check is currently exclusively performed for the result register 0. In the event the compiler reorders instructions, BPF_MOV64_REG instructions may be moved before the conditional jump which causes them to keep their type PTR_TO_MAP_VALUE_OR_NULL to which the verifier objects when the register is accessed: 0: (b7) r1 = 10 1: (7b) *(u64 *)(r10 -8) = r1 2: (bf) r2 = r10 3: (07) r2 += -8 4: (18) r1 = 0x59c00000 6: (85) call 1 7: (bf) r4 = r0 8: (15) if r0 == 0x0 goto pc+1 R0=map_value(ks=8,vs=8) R4=map_value_or_null(ks=8,vs=8) R10=fp 9: (7a) *(u64 *)(r4 +0) = 0 R4 invalid mem access 'map_value_or_null' This commit extends the verifier to keep track of all identical PTR_TO_MAP_VALUE_OR_NULL registers after a map_elem_lookup() by assigning them an ID and then marking them all when the conditional jump is observed. Signed-off-by: Thomas Graf Reviewed-by: Josef Bacik Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf_verifier.h | 2 +- kernel/bpf/verifier.c | 61 ++++++++++++++++++++++++++---------- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 6aaf425cebc3..7453c1281531 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -24,13 +24,13 @@ struct bpf_reg_state { */ s64 min_value; u64 max_value; + u32 id; union { /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ s64 imm; /* valid when type == PTR_TO_PACKET* */ struct { - u32 id; u16 off; u16 range; }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8199821f54cf..c428c9f85186 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -212,9 +212,10 @@ static void print_verifier_state(struct bpf_verifier_state *state) else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE || t == PTR_TO_MAP_VALUE_OR_NULL || t == PTR_TO_MAP_VALUE_ADJ) - verbose("(ks=%d,vs=%d)", + verbose("(ks=%d,vs=%d,id=%u)", reg->map_ptr->key_size, - reg->map_ptr->value_size); + reg->map_ptr->value_size, + reg->id); if (reg->min_value != BPF_REGISTER_MIN_RANGE) verbose(",min_value=%lld", (long long)reg->min_value); @@ -447,6 +448,7 @@ static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno) { BUG_ON(regno >= MAX_BPF_REG); regs[regno].type = UNKNOWN_VALUE; + regs[regno].id = 0; regs[regno].imm = 0; } @@ -1252,6 +1254,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id) return -EINVAL; } regs[BPF_REG_0].map_ptr = meta.map_ptr; + regs[BPF_REG_0].id = ++env->id_gen; } else { verbose("unknown return type %d of func %d\n", fn->ret_type, func_id); @@ -1668,8 +1671,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) insn->src_reg); return -EACCES; } - regs[insn->dst_reg].type = UNKNOWN_VALUE; - regs[insn->dst_reg].map_ptr = NULL; + mark_reg_unknown_value(regs, insn->dst_reg); } } else { /* case: R = imm @@ -1931,6 +1933,38 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, check_reg_overflow(true_reg); } +static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, + enum bpf_reg_type type) +{ + struct bpf_reg_state *reg = ®s[regno]; + + if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) { + reg->type = type; + if (type == UNKNOWN_VALUE) + mark_reg_unknown_value(regs, regno); + } +} + +/* The logic is similar to find_good_pkt_pointers(), both could eventually + * be folded together at some point. + */ +static void mark_map_regs(struct bpf_verifier_state *state, u32 regno, + enum bpf_reg_type type) +{ + struct bpf_reg_state *regs = state->regs; + int i; + + for (i = 0; i < MAX_BPF_REG; i++) + mark_map_reg(regs, i, regs[regno].id, type); + + for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { + if (state->stack_slot_type[i] != STACK_SPILL) + continue; + mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE, + regs[regno].id, type); + } +} + static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx) { @@ -2018,18 +2052,13 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, if (BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { - if (opcode == BPF_JEQ) { - /* next fallthrough insn can access memory via - * this register - */ - regs[insn->dst_reg].type = PTR_TO_MAP_VALUE; - /* branch targer cannot access it, since reg == 0 */ - mark_reg_unknown_value(other_branch->regs, - insn->dst_reg); - } else { - other_branch->regs[insn->dst_reg].type = PTR_TO_MAP_VALUE; - mark_reg_unknown_value(regs, insn->dst_reg); - } + /* Mark all identical map registers in each branch as either + * safe or unknown depending R == 0 or R != 0 conditional. + */ + mark_map_regs(this_branch, insn->dst_reg, + opcode == BPF_JEQ ? PTR_TO_MAP_VALUE : UNKNOWN_VALUE); + mark_map_regs(other_branch, insn->dst_reg, + opcode == BPF_JEQ ? UNKNOWN_VALUE : PTR_TO_MAP_VALUE); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { From b7f5aa1ca0bedbd109be7563f6a94c9a37714537 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 7 Dec 2016 10:57:59 -0800 Subject: [PATCH 302/357] bpf: fix state equivalence [ Upstream commit d2a4dd37f6b41fbcad76efbf63124eb3126c66fe ] Commmits 57a09bf0a416 ("bpf: Detect identical PTR_TO_MAP_VALUE_OR_NULL registers") and 484611357c19 ("bpf: allow access into map value arrays") by themselves are correct, but in combination they make state equivalence ignore 'id' field of the register state which can lead to accepting invalid program. Fixes: 57a09bf0a416 ("bpf: Detect identical PTR_TO_MAP_VALUE_OR_NULL registers") Fixes: 484611357c19 ("bpf: allow access into map value arrays") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Thomas Graf Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf_verifier.h | 14 +++++++------- kernel/bpf/verifier.c | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7453c1281531..a13b031dc6b8 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -18,13 +18,6 @@ struct bpf_reg_state { enum bpf_reg_type type; - /* - * Used to determine if any memory access using this register will - * result in a bad access. - */ - s64 min_value; - u64 max_value; - u32 id; union { /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ s64 imm; @@ -40,6 +33,13 @@ struct bpf_reg_state { */ struct bpf_map *map_ptr; }; + u32 id; + /* Used to determine if any memory access using this register will + * result in a bad access. These two fields must be last. + * See states_equal() + */ + s64 min_value; + u64 max_value; }; enum bpf_stack_slot_type { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c428c9f85186..6036d1e8c2a9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2498,7 +2498,7 @@ static bool states_equal(struct bpf_verifier_env *env, * we didn't do a variable access into a map then we are a-ok. */ if (!varlen_map_access && - rold->type == rcur->type && rold->imm == rcur->imm) + memcmp(rold, rcur, offsetofend(struct bpf_reg_state, id)) == 0) continue; /* If we didn't map access then again we don't care about the From 1889d6d9b5e767c7070ad31f93371dcb05b0cea4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 15 Dec 2016 01:30:06 +0100 Subject: [PATCH 303/357] bpf: fix regression on verifier pruning wrt map lookups [ Upstream commit a08dd0da5307ba01295c8383923e51e7997c3576 ] Commit 57a09bf0a416 ("bpf: Detect identical PTR_TO_MAP_VALUE_OR_NULL registers") introduced a regression where existing programs stopped loading due to reaching the verifier's maximum complexity limit, whereas prior to this commit they were loading just fine; the affected program has roughly 2k instructions. What was found is that state pruning couldn't be performed effectively anymore due to mismatches of the verifier's register state, in particular in the id tracking. It doesn't mean that 57a09bf0a416 is incorrect per se, but rather that verifier needs to perform a lot more work for the same program with regards to involved map lookups. Since commit 57a09bf0a416 is only about tracking registers with type PTR_TO_MAP_VALUE_OR_NULL, the id is only needed to follow registers until they are promoted through pattern matching with a NULL check to either PTR_TO_MAP_VALUE or UNKNOWN_VALUE type. After that point, the id becomes irrelevant for the transitioned types. For UNKNOWN_VALUE, id is already reset to 0 via mark_reg_unknown_value(), but not so for PTR_TO_MAP_VALUE where id is becoming stale. It's even transferred further into other types that don't make use of it. Among others, one example is where UNKNOWN_VALUE is set on function call return with RET_INTEGER return type. states_equal() will then fall through the memcmp() on register state; note that the second memcmp() uses offsetofend(), so the id is part of that since d2a4dd37f6b4 ("bpf: fix state equivalence"). But the bisect pointed already to 57a09bf0a416, where we really reach beyond complexity limit. What I found was that states_equal() often failed in this case due to id mismatches in spilled regs with registers in type PTR_TO_MAP_VALUE. Unlike non-spilled regs, spilled regs just perform a memcmp() on their reg state and don't have any other optimizations in place, therefore also id was relevant in this case for making a pruning decision. We can safely reset id to 0 as well when converting to PTR_TO_MAP_VALUE. For the affected program, it resulted in a ~17 fold reduction of complexity and let the program load fine again. Selftest suite also runs fine. The only other place where env->id_gen is used currently is through direct packet access, but for these cases id is long living, thus a different scenario. Also, the current logic in mark_map_regs() is not fully correct when marking NULL branch with UNKNOWN_VALUE. We need to cache the destination reg's id in any case. Otherwise, once we marked that reg as UNKNOWN_VALUE, it's id is reset and any subsequent registers that hold the original id and are of type PTR_TO_MAP_VALUE_OR_NULL won't be marked UNKNOWN_VALUE anymore, since mark_map_reg() reuses the uncached regs[regno].id that was just overridden. Note, we don't need to cache it outside of mark_map_regs(), since it's called once on this_branch and the other time on other_branch, which are both two independent verifier states. A test case for this is added here, too. Fixes: 57a09bf0a416 ("bpf: Detect identical PTR_TO_MAP_VALUE_OR_NULL registers") Signed-off-by: Daniel Borkmann Acked-by: Thomas Graf Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6036d1e8c2a9..a967ae8a5b8c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1940,6 +1940,11 @@ static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) { reg->type = type; + /* We don't need id from this point onwards anymore, thus we + * should better reset it, so that state pruning has chances + * to take effect. + */ + reg->id = 0; if (type == UNKNOWN_VALUE) mark_reg_unknown_value(regs, regno); } @@ -1952,16 +1957,16 @@ static void mark_map_regs(struct bpf_verifier_state *state, u32 regno, enum bpf_reg_type type) { struct bpf_reg_state *regs = state->regs; + u32 id = regs[regno].id; int i; for (i = 0; i < MAX_BPF_REG; i++) - mark_map_reg(regs, i, regs[regno].id, type); + mark_map_reg(regs, i, id, type); for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { if (state->stack_slot_type[i] != STACK_SPILL) continue; - mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE, - regs[regno].id, type); + mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE, id, type); } } From 0e0f1d6fdb353fc886ee99d646c561e4ad3d4ebc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 18 Dec 2016 01:52:59 +0100 Subject: [PATCH 304/357] bpf: fix mark_reg_unknown_value for spilled regs on map value marking [ Upstream commit 6760bf2ddde8ad64f8205a651223a93de3a35494 ] Martin reported a verifier issue that hit the BUG_ON() for his test case in the mark_reg_unknown_value() function: [ 202.861380] kernel BUG at kernel/bpf/verifier.c:467! [...] [ 203.291109] Call Trace: [ 203.296501] [] mark_map_reg+0x45/0x50 [ 203.308225] [] mark_map_regs+0x78/0x90 [ 203.320140] [] do_check+0x226d/0x2c90 [ 203.331865] [] bpf_check+0x48b/0x780 [ 203.343403] [] bpf_prog_load+0x27e/0x440 [ 203.355705] [] ? handle_mm_fault+0x11af/0x1230 [ 203.369158] [] ? security_capable+0x48/0x60 [ 203.382035] [] SyS_bpf+0x124/0x960 [ 203.393185] [] ? __do_page_fault+0x276/0x490 [ 203.406258] [] entry_SYSCALL_64_fastpath+0x13/0x94 This issue got uncovered after the fix in a08dd0da5307 ("bpf: fix regression on verifier pruning wrt map lookups"). The reason why it wasn't noticed before was, because as mentioned in a08dd0da5307, mark_map_regs() was doing the id matching incorrectly based on the uncached regs[regno].id. So, in the first loop, we walked all regs and as soon as we found regno == i, then this reg's id was cleared when calling mark_reg_unknown_value() thus that every subsequent register was probed against id of 0 (which, in combination with the PTR_TO_MAP_VALUE_OR_NULL type is an invalid condition that no other register state can hold), and therefore wasn't type transitioned such as in the spilled register case for the second loop. Now since that got fixed, it turned out that 57a09bf0a416 ("bpf: Detect identical PTR_TO_MAP_VALUE_OR_NULL registers") used mark_reg_unknown_value() incorrectly for the spilled regs, and thus hitting the BUG_ON() in some cases due to regno >= MAX_BPF_REG. Although spilled regs have the same type as the non-spilled regs for the verifier state, that is, struct bpf_reg_state, they are semantically different from the non-spilled regs. In other words, there can be up to 64 (MAX_BPF_STACK / BPF_REG_SIZE) spilled regs in the stack, for example, register R could have been spilled by the program to stack location X, Y, Z, and in mark_map_regs() we need to scan these stack slots of type STACK_SPILL for potential registers that we have to transition from PTR_TO_MAP_VALUE_OR_NULL. Therefore, depending on the location, the spilled_regs regno can be a lot higher than just MAX_BPF_REG's value since we operate on stack instead. The reset in mark_reg_unknown_value() itself is just fine, only that the BUG_ON() was inappropriate for this. Fix it by making a __mark_reg_unknown_value() version that can be called from mark_map_reg() generically; we know for the non-spilled case that the regno is always < MAX_BPF_REG anyway. Fixes: 57a09bf0a416 ("bpf: Detect identical PTR_TO_MAP_VALUE_OR_NULL registers") Reported-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a967ae8a5b8c..85d1c9423ccb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -444,14 +444,19 @@ static void init_reg_state(struct bpf_reg_state *regs) regs[BPF_REG_1].type = PTR_TO_CTX; } -static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno) +static void __mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno) { - BUG_ON(regno >= MAX_BPF_REG); regs[regno].type = UNKNOWN_VALUE; regs[regno].id = 0; regs[regno].imm = 0; } +static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno) +{ + BUG_ON(regno >= MAX_BPF_REG); + __mark_reg_unknown_value(regs, regno); +} + static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno) { regs[regno].min_value = BPF_REGISTER_MIN_RANGE; @@ -1946,7 +1951,7 @@ static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, */ reg->id = 0; if (type == UNKNOWN_VALUE) - mark_reg_unknown_value(regs, regno); + __mark_reg_unknown_value(regs, regno); } } From 2382c1486c62a21d162f0d457c61cd87a2498ffd Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Wed, 4 Jan 2017 01:22:52 -0800 Subject: [PATCH 305/357] dmaengine: iota: ioat_alloc_chan_resources should not perform sleeping allocations. commit 21d25f6a4217e755906cb548b55ddab39d0e88b9 upstream. On a kernel with DEBUG_LOCKS, ioat_free_chan_resources triggers an in_interrupt() warning. With PROVE_LOCKING, it reports detecting a SOFTIRQ-safe to SOFTIRQ-unsafe lock ordering in the same code path. This is because dma_generic_alloc_coherent() checks if the GFP flags permit blocking. It allocates from different subsystems if blocking is permitted. The free path knows how to return the memory to the correct allocator. If GFP_KERNEL is specified then the alloc and free end up going through cma_alloc(), which uses mutexes. Given that ioat_free_chan_resources() can be called in interrupt context, ioat_alloc_chan_resources() must specify GFP_NOWAIT so that the allocations do not block and instead use an allocator that uses spinlocks. Signed-off-by: Krister Johansen Acked-by: Dave Jiang Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/ioat/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 015f7110b96d..d235fbe2564f 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -691,7 +691,7 @@ static int ioat_alloc_chan_resources(struct dma_chan *c) /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ ioat_chan->completion = dma_pool_zalloc(ioat_chan->ioat_dma->completion_pool, - GFP_KERNEL, &ioat_chan->completion_dma); + GFP_NOWAIT, &ioat_chan->completion_dma); if (!ioat_chan->completion) return -ENOMEM; @@ -701,7 +701,7 @@ static int ioat_alloc_chan_resources(struct dma_chan *c) ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); order = IOAT_MAX_ORDER; - ring = ioat_alloc_ring(c, order, GFP_KERNEL); + ring = ioat_alloc_ring(c, order, GFP_NOWAIT); if (!ring) return -ENOMEM; From 4b40611a9b7e5d06626674af898608167ac3f727 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Fri, 17 Mar 2017 00:48:18 +0000 Subject: [PATCH 306/357] xen: do not re-use pirq number cached in pci device msi msg data [ Upstream commit c74fd80f2f41d05f350bb478151021f88551afe8 ] Revert the main part of commit: af42b8d12f8a ("xen: fix MSI setup and teardown for PV on HVM guests") That commit introduced reading the pci device's msi message data to see if a pirq was previously configured for the device's msi/msix, and re-use that pirq. At the time, that was the correct behavior. However, a later change to Qemu caused it to call into the Xen hypervisor to unmap all pirqs for a pci device, when the pci device disables its MSI/MSIX vectors; specifically the Qemu commit: c976437c7dba9c7444fb41df45468968aaa326ad ("qemu-xen: free all the pirqs for msi/msix when driver unload") Once Qemu added this pirq unmapping, it was no longer correct for the kernel to re-use the pirq number cached in the pci device msi message data. All Qemu releases since 2.1.0 contain the patch that unmaps the pirqs when the pci device disables its MSI/MSIX vectors. This bug is causing failures to initialize multiple NVMe controllers under Xen, because the NVMe driver sets up a single MSIX vector for each controller (concurrently), and then after using that to talk to the controller for some configuration data, it disables the single MSIX vector and re-configures all the MSIX vectors it needs. So the MSIX setup code tries to re-use the cached pirq from the first vector for each controller, but the hypervisor has already given away that pirq to another controller, and its initialization fails. This is discussed in more detail at: https://lists.xen.org/archives/html/xen-devel/2017-01/msg00447.html Fixes: af42b8d12f8a ("xen: fix MSI setup and teardown for PV on HVM guests") Signed-off-by: Dan Streetman Reviewed-by: Stefano Stabellini Acked-by: Konrad Rzeszutek Wilk Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/pci/xen.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index bedfab98077a..a00a6c07bb6f 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -234,23 +234,14 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return 1; for_each_pci_msi_entry(msidesc, dev) { - __pci_read_msi_msg(msidesc, &msg); - pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | - ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); - if (msg.data != XEN_PIRQ_MSI_DATA || - xen_irq_from_pirq(pirq) < 0) { - pirq = xen_allocate_pirq_msi(dev, msidesc); - if (pirq < 0) { - irq = -ENODEV; - goto error; - } - xen_msi_compose_msg(dev, pirq, &msg); - __pci_write_msi_msg(msidesc, &msg); - dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); - } else { - dev_dbg(&dev->dev, - "xen: msi already bound to pirq=%d\n", pirq); + pirq = xen_allocate_pirq_msi(dev, msidesc); + if (pirq < 0) { + irq = -ENODEV; + goto error; } + xen_msi_compose_msg(dev, pirq, &msg); + __pci_write_msi_msg(msidesc, &msg); + dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, (type == PCI_CAP_ID_MSI) ? nvec : 1, (type == PCI_CAP_ID_MSIX) ? From 15ffc931eeb94d9673c2a163828704a7cf4c1df5 Mon Sep 17 00:00:00 2001 From: Chris J Arges Date: Fri, 17 Mar 2017 00:48:19 +0000 Subject: [PATCH 307/357] igb: Workaround for igb i210 firmware issue [ Upstream commit 4e684f59d760a2c7c716bb60190783546e2d08a1 ] Sometimes firmware may not properly initialize I347AT4_PAGE_SELECT causing the probe of an igb i210 NIC to fail. This patch adds an addition zeroing of this register during igb_get_phy_id to workaround this issue. Thanks for Jochen Henneberg for the idea and original patch. Signed-off-by: Chris J Arges Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/igb/e1000_phy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index 5b54254aed4f..569ee25642b4 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -77,6 +77,10 @@ s32 igb_get_phy_id(struct e1000_hw *hw) s32 ret_val = 0; u16 phy_id; + /* ensure PHY page selection to fix misconfigured i210 */ + if (hw->mac.type == e1000_i210) + phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0); + ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id); if (ret_val) goto out; From 61229e62c1aa4d397aab9949325cab49acfd7dab Mon Sep 17 00:00:00 2001 From: Todd Fujinaka Date: Fri, 17 Mar 2017 00:48:19 +0000 Subject: [PATCH 308/357] igb: add i211 to i210 PHY workaround [ Upstream commit 5bc8c230e2a993b49244f9457499f17283da9ec7 ] i210 and i211 share the same PHY but have different PCI IDs. Don't forget i211 for any i210 workarounds. Signed-off-by: Todd Fujinaka Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/igb/e1000_phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index 569ee25642b4..2788a5409023 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -78,7 +78,7 @@ s32 igb_get_phy_id(struct e1000_hw *hw) u16 phy_id; /* ensure PHY page selection to fix misconfigured i210 */ - if (hw->mac.type == e1000_i210) + if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0); ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id); From 0ad1d7660a57152ab6c7ae8ff5d02b6460b281a8 Mon Sep 17 00:00:00 2001 From: Michael Cyr Date: Fri, 17 Mar 2017 00:48:20 +0000 Subject: [PATCH 309/357] scsi: ibmvscsis: Issues from Dan Carpenter/Smatch [ Upstream commit 11950d70b52d2bc5e3580da8cd63909ef38d67db ] Signed-off-by: Michael Cyr Signed-off-by: Bryant G. Ly Tested-by: Steven Royer Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 91dfd58b175d..292a3f3837c7 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -1746,14 +1746,7 @@ static long ibmvscsis_mad(struct scsi_info *vscsi, struct viosrp_crq *crq) pr_debug("mad: type %d\n", be32_to_cpu(mad->type)); - if (be16_to_cpu(mad->length) < 0) { - dev_err(&vscsi->dev, "mad: length is < 0\n"); - ibmvscsis_post_disconnect(vscsi, - ERR_DISCONNECT_RECONNECT, 0); - rc = SRP_VIOLATION; - } else { - rc = ibmvscsis_process_mad(vscsi, iue); - } + rc = ibmvscsis_process_mad(vscsi, iue); pr_debug("mad: status %hd, rc %ld\n", be16_to_cpu(mad->status), rc); @@ -2523,7 +2516,6 @@ static void ibmvscsis_parse_cmd(struct scsi_info *vscsi, dev_err(&vscsi->dev, "0x%llx: parsing SRP descriptor table failed.\n", srp->tag); goto fail; - return; } cmd->rsp.sol_not = srp->sol_not; @@ -3379,7 +3371,8 @@ static int ibmvscsis_probe(struct vio_dev *vdev, INIT_LIST_HEAD(&vscsi->waiting_rsp); INIT_LIST_HEAD(&vscsi->active_q); - snprintf(vscsi->tport.tport_name, 256, "%s", dev_name(&vdev->dev)); + snprintf(vscsi->tport.tport_name, IBMVSCSIS_NAMELEN, "%s", + dev_name(&vdev->dev)); pr_debug("probe tport_name: %s\n", vscsi->tport.tport_name); From 29022860df8efabdf0a2d356fd614d4dba2791a5 Mon Sep 17 00:00:00 2001 From: Michael Cyr Date: Fri, 17 Mar 2017 00:48:20 +0000 Subject: [PATCH 310/357] scsi: ibmvscsis: Return correct partition name/# to client [ Upstream commit 9c93cf03d4eb3dc58931ff7cac0af9c344fe5e0b ] Signed-off-by: Michael Cyr Signed-off-by: Bryant G. Ly Tested-by: Steven Royer Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 292a3f3837c7..8d5eeb1d2338 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -3387,6 +3387,9 @@ static int ibmvscsis_probe(struct vio_dev *vdev, strncat(vscsi->eye, vdev->name, MAX_EYE); vscsi->dds.unit_id = vdev->unit_address; + strncpy(vscsi->dds.partition_name, partition_name, + sizeof(vscsi->dds.partition_name)); + vscsi->dds.partition_num = partition_number; spin_lock_bh(&ibmvscsis_dev_lock); list_add_tail(&vscsi->list, &ibmvscsis_dev_list); @@ -3603,7 +3606,7 @@ static int ibmvscsis_get_system_info(void) num = of_get_property(rootdn, "ibm,partition-no", NULL); if (num) - partition_number = *num; + partition_number = of_read_number(num, 1); of_node_put(rootdn); From 4d36f4859fa443e02e7834b9a5ce01b63f7af2c3 Mon Sep 17 00:00:00 2001 From: Michael Cyr Date: Fri, 17 Mar 2017 00:48:20 +0000 Subject: [PATCH 311/357] scsi: ibmvscsis: Clean up properly if target_submit_cmd/tmr fails [ Upstream commit 7435b32e2d2fb5da6c2ae9b9c8ce56d8a3cb3bc3 ] Signed-off-by: Michael Cyr Signed-off-by: Bryant G. Ly Tested-by: Steven Royer Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 8d5eeb1d2338..e4cd8ffe7429 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -2552,6 +2552,10 @@ static void ibmvscsis_parse_cmd(struct scsi_info *vscsi, data_len, attr, dir, 0); if (rc) { dev_err(&vscsi->dev, "target_submit_cmd failed, rc %d\n", rc); + spin_lock_bh(&vscsi->intr_lock); + list_del(&cmd->list); + ibmvscsis_free_cmd_resources(vscsi, cmd); + spin_unlock_bh(&vscsi->intr_lock); goto fail; } return; @@ -2631,6 +2635,9 @@ static void ibmvscsis_parse_task(struct scsi_info *vscsi, if (rc) { dev_err(&vscsi->dev, "target_submit_tmr failed, rc %d\n", rc); + spin_lock_bh(&vscsi->intr_lock); + list_del(&cmd->list); + spin_unlock_bh(&vscsi->intr_lock); cmd->se_cmd.se_tmr_req->response = TMR_FUNCTION_REJECTED; } From 189491f81cf639d2d0292334580b9969c8a424df Mon Sep 17 00:00:00 2001 From: Michael Cyr Date: Fri, 17 Mar 2017 00:48:21 +0000 Subject: [PATCH 312/357] scsi: ibmvscsis: Rearrange functions for future patches [ Upstream commit 79fac9c9b74f4951c9ce82b22e714bcc34ae4a56 ] This patch reorders functions in a manner necessary for a follow-on patch. It also makes some minor styling changes (mostly removing extra spaces) and fixes some typos. There are no code changes in this patch, with one exception: due to the reordering of the functions, I needed to explicitly declare a function at the top of the file. However, this will be removed in the next patch, since the code requiring the predeclaration will be removed. Signed-off-by: Michael Cyr Signed-off-by: Bryant G. Ly Tested-by: Steven Royer Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 658 ++++++++++++----------- 1 file changed, 330 insertions(+), 328 deletions(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index e4cd8ffe7429..4feae43ee402 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -22,7 +22,7 @@ * ****************************************************************************/ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include @@ -62,6 +62,8 @@ static long ibmvscsis_parse_command(struct scsi_info *vscsi, static void ibmvscsis_adapter_idle(struct scsi_info *vscsi); +static void ibmvscsis_reset_queue(struct scsi_info *vscsi, uint new_state); + static void ibmvscsis_determine_resid(struct se_cmd *se_cmd, struct srp_rsp *rsp) { @@ -82,7 +84,7 @@ static void ibmvscsis_determine_resid(struct se_cmd *se_cmd, } } else if (se_cmd->se_cmd_flags & SCF_OVERFLOW_BIT) { if (se_cmd->data_direction == DMA_TO_DEVICE) { - /* residual data from an overflow write */ + /* residual data from an overflow write */ rsp->flags = SRP_RSP_FLAG_DOOVER; rsp->data_out_res_cnt = cpu_to_be32(residual_count); } else if (se_cmd->data_direction == DMA_FROM_DEVICE) { @@ -102,7 +104,7 @@ static void ibmvscsis_determine_resid(struct se_cmd *se_cmd, * and the function returns TRUE. * * EXECUTION ENVIRONMENT: - * Interrupt or Process environment + * Interrupt or Process environment */ static bool connection_broken(struct scsi_info *vscsi) { @@ -325,7 +327,7 @@ static struct viosrp_crq *ibmvscsis_cmd_q_dequeue(uint mask, } /** - * ibmvscsis_send_init_message() - send initialize message to the client + * ibmvscsis_send_init_message() - send initialize message to the client * @vscsi: Pointer to our adapter structure * @format: Which Init Message format to send * @@ -383,13 +385,13 @@ static long ibmvscsis_check_init_msg(struct scsi_info *vscsi, uint *format) vscsi->cmd_q.base_addr); if (crq) { *format = (uint)(crq->format); - rc = ERROR; + rc = ERROR; crq->valid = INVALIDATE_CMD_RESP_EL; dma_rmb(); } } else { *format = (uint)(crq->format); - rc = ERROR; + rc = ERROR; crq->valid = INVALIDATE_CMD_RESP_EL; dma_rmb(); } @@ -397,166 +399,6 @@ static long ibmvscsis_check_init_msg(struct scsi_info *vscsi, uint *format) return rc; } -/** - * ibmvscsis_establish_new_q() - Establish new CRQ queue - * @vscsi: Pointer to our adapter structure - * @new_state: New state being established after resetting the queue - * - * Must be called with interrupt lock held. - */ -static long ibmvscsis_establish_new_q(struct scsi_info *vscsi, uint new_state) -{ - long rc = ADAPT_SUCCESS; - uint format; - - vscsi->flags &= PRESERVE_FLAG_FIELDS; - vscsi->rsp_q_timer.timer_pops = 0; - vscsi->debit = 0; - vscsi->credit = 0; - - rc = vio_enable_interrupts(vscsi->dma_dev); - if (rc) { - pr_warn("reset_queue: failed to enable interrupts, rc %ld\n", - rc); - return rc; - } - - rc = ibmvscsis_check_init_msg(vscsi, &format); - if (rc) { - dev_err(&vscsi->dev, "reset_queue: check_init_msg failed, rc %ld\n", - rc); - return rc; - } - - if (format == UNUSED_FORMAT && new_state == WAIT_CONNECTION) { - rc = ibmvscsis_send_init_message(vscsi, INIT_MSG); - switch (rc) { - case H_SUCCESS: - case H_DROPPED: - case H_CLOSED: - rc = ADAPT_SUCCESS; - break; - - case H_PARAMETER: - case H_HARDWARE: - break; - - default: - vscsi->state = UNDEFINED; - rc = H_HARDWARE; - break; - } - } - - return rc; -} - -/** - * ibmvscsis_reset_queue() - Reset CRQ Queue - * @vscsi: Pointer to our adapter structure - * @new_state: New state to establish after resetting the queue - * - * This function calls h_free_q and then calls h_reg_q and does all - * of the bookkeeping to get us back to where we can communicate. - * - * Actually, we don't always call h_free_crq. A problem was discovered - * where one partition would close and reopen his queue, which would - * cause his partner to get a transport event, which would cause him to - * close and reopen his queue, which would cause the original partition - * to get a transport event, etc., etc. To prevent this, we don't - * actually close our queue if the client initiated the reset, (i.e. - * either we got a transport event or we have detected that the client's - * queue is gone) - * - * EXECUTION ENVIRONMENT: - * Process environment, called with interrupt lock held - */ -static void ibmvscsis_reset_queue(struct scsi_info *vscsi, uint new_state) -{ - int bytes; - long rc = ADAPT_SUCCESS; - - pr_debug("reset_queue: flags 0x%x\n", vscsi->flags); - - /* don't reset, the client did it for us */ - if (vscsi->flags & (CLIENT_FAILED | TRANS_EVENT)) { - vscsi->flags &= PRESERVE_FLAG_FIELDS; - vscsi->rsp_q_timer.timer_pops = 0; - vscsi->debit = 0; - vscsi->credit = 0; - vscsi->state = new_state; - vio_enable_interrupts(vscsi->dma_dev); - } else { - rc = ibmvscsis_free_command_q(vscsi); - if (rc == ADAPT_SUCCESS) { - vscsi->state = new_state; - - bytes = vscsi->cmd_q.size * PAGE_SIZE; - rc = h_reg_crq(vscsi->dds.unit_id, - vscsi->cmd_q.crq_token, bytes); - if (rc == H_CLOSED || rc == H_SUCCESS) { - rc = ibmvscsis_establish_new_q(vscsi, - new_state); - } - - if (rc != ADAPT_SUCCESS) { - pr_debug("reset_queue: reg_crq rc %ld\n", rc); - - vscsi->state = ERR_DISCONNECTED; - vscsi->flags |= RESPONSE_Q_DOWN; - ibmvscsis_free_command_q(vscsi); - } - } else { - vscsi->state = ERR_DISCONNECTED; - vscsi->flags |= RESPONSE_Q_DOWN; - } - } -} - -/** - * ibmvscsis_free_cmd_resources() - Free command resources - * @vscsi: Pointer to our adapter structure - * @cmd: Command which is not longer in use - * - * Must be called with interrupt lock held. - */ -static void ibmvscsis_free_cmd_resources(struct scsi_info *vscsi, - struct ibmvscsis_cmd *cmd) -{ - struct iu_entry *iue = cmd->iue; - - switch (cmd->type) { - case TASK_MANAGEMENT: - case SCSI_CDB: - /* - * When the queue goes down this value is cleared, so it - * cannot be cleared in this general purpose function. - */ - if (vscsi->debit) - vscsi->debit -= 1; - break; - case ADAPTER_MAD: - vscsi->flags &= ~PROCESSING_MAD; - break; - case UNSET_TYPE: - break; - default: - dev_err(&vscsi->dev, "free_cmd_resources unknown type %d\n", - cmd->type); - break; - } - - cmd->iue = NULL; - list_add_tail(&cmd->list, &vscsi->free_cmd); - srp_iu_put(iue); - - if (list_empty(&vscsi->active_q) && list_empty(&vscsi->schedule_q) && - list_empty(&vscsi->waiting_rsp) && (vscsi->flags & WAIT_FOR_IDLE)) { - vscsi->flags &= ~WAIT_FOR_IDLE; - complete(&vscsi->wait_idle); - } -} - /** * ibmvscsis_disconnect() - Helper function to disconnect * @work: Pointer to work_struct, gives access to our adapter structure @@ -590,7 +432,7 @@ static void ibmvscsis_disconnect(struct work_struct *work) * should transitition to the new state */ switch (vscsi->state) { - /* Should never be called while in this state. */ + /* Should never be called while in this state. */ case NO_QUEUE: /* * Can never transition from this state; @@ -806,6 +648,316 @@ static void ibmvscsis_post_disconnect(struct scsi_info *vscsi, uint new_state, vscsi->flags, vscsi->new_state); } +/** + * ibmvscsis_handle_init_compl_msg() - Respond to an Init Complete Message + * @vscsi: Pointer to our adapter structure + * + * Must be called with interrupt lock held. + */ +static long ibmvscsis_handle_init_compl_msg(struct scsi_info *vscsi) +{ + long rc = ADAPT_SUCCESS; + + switch (vscsi->state) { + case NO_QUEUE: + case ERR_DISCONNECT: + case ERR_DISCONNECT_RECONNECT: + case ERR_DISCONNECTED: + case UNCONFIGURING: + case UNDEFINED: + rc = ERROR; + break; + + case WAIT_CONNECTION: + vscsi->state = CONNECTED; + break; + + case WAIT_IDLE: + case SRP_PROCESSING: + case CONNECTED: + case WAIT_ENABLED: + case PART_UP_WAIT_ENAB: + default: + rc = ERROR; + dev_err(&vscsi->dev, "init_msg: invalid state %d to get init compl msg\n", + vscsi->state); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + break; + } + + return rc; +} + +/** + * ibmvscsis_handle_init_msg() - Respond to an Init Message + * @vscsi: Pointer to our adapter structure + * + * Must be called with interrupt lock held. + */ +static long ibmvscsis_handle_init_msg(struct scsi_info *vscsi) +{ + long rc = ADAPT_SUCCESS; + + switch (vscsi->state) { + case WAIT_ENABLED: + vscsi->state = PART_UP_WAIT_ENAB; + break; + + case WAIT_CONNECTION: + rc = ibmvscsis_send_init_message(vscsi, INIT_COMPLETE_MSG); + switch (rc) { + case H_SUCCESS: + vscsi->state = CONNECTED; + break; + + case H_PARAMETER: + dev_err(&vscsi->dev, "init_msg: failed to send, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT, 0); + break; + + case H_DROPPED: + dev_err(&vscsi->dev, "init_msg: failed to send, rc %ld\n", + rc); + rc = ERROR; + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, 0); + break; + + case H_CLOSED: + pr_warn("init_msg: failed to send, rc %ld\n", rc); + rc = 0; + break; + } + break; + + case UNDEFINED: + rc = ERROR; + break; + + case UNCONFIGURING: + break; + + case PART_UP_WAIT_ENAB: + case CONNECTED: + case SRP_PROCESSING: + case WAIT_IDLE: + case NO_QUEUE: + case ERR_DISCONNECT: + case ERR_DISCONNECT_RECONNECT: + case ERR_DISCONNECTED: + default: + rc = ERROR; + dev_err(&vscsi->dev, "init_msg: invalid state %d to get init msg\n", + vscsi->state); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + break; + } + + return rc; +} + +/** + * ibmvscsis_init_msg() - Respond to an init message + * @vscsi: Pointer to our adapter structure + * @crq: Pointer to CRQ element containing the Init Message + * + * EXECUTION ENVIRONMENT: + * Interrupt, interrupt lock held + */ +static long ibmvscsis_init_msg(struct scsi_info *vscsi, struct viosrp_crq *crq) +{ + long rc = ADAPT_SUCCESS; + + pr_debug("init_msg: state 0x%hx\n", vscsi->state); + + rc = h_vioctl(vscsi->dds.unit_id, H_GET_PARTNER_INFO, + (u64)vscsi->map_ioba | ((u64)PAGE_SIZE << 32), 0, 0, 0, + 0); + if (rc == H_SUCCESS) { + vscsi->client_data.partition_number = + be64_to_cpu(*(u64 *)vscsi->map_buf); + pr_debug("init_msg, part num %d\n", + vscsi->client_data.partition_number); + } else { + pr_debug("init_msg h_vioctl rc %ld\n", rc); + rc = ADAPT_SUCCESS; + } + + if (crq->format == INIT_MSG) { + rc = ibmvscsis_handle_init_msg(vscsi); + } else if (crq->format == INIT_COMPLETE_MSG) { + rc = ibmvscsis_handle_init_compl_msg(vscsi); + } else { + rc = ERROR; + dev_err(&vscsi->dev, "init_msg: invalid format %d\n", + (uint)crq->format); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + } + + return rc; +} + +/** + * ibmvscsis_establish_new_q() - Establish new CRQ queue + * @vscsi: Pointer to our adapter structure + * @new_state: New state being established after resetting the queue + * + * Must be called with interrupt lock held. + */ +static long ibmvscsis_establish_new_q(struct scsi_info *vscsi, uint new_state) +{ + long rc = ADAPT_SUCCESS; + uint format; + + vscsi->flags &= PRESERVE_FLAG_FIELDS; + vscsi->rsp_q_timer.timer_pops = 0; + vscsi->debit = 0; + vscsi->credit = 0; + + rc = vio_enable_interrupts(vscsi->dma_dev); + if (rc) { + pr_warn("reset_queue: failed to enable interrupts, rc %ld\n", + rc); + return rc; + } + + rc = ibmvscsis_check_init_msg(vscsi, &format); + if (rc) { + dev_err(&vscsi->dev, "reset_queue: check_init_msg failed, rc %ld\n", + rc); + return rc; + } + + if (format == UNUSED_FORMAT && new_state == WAIT_CONNECTION) { + rc = ibmvscsis_send_init_message(vscsi, INIT_MSG); + switch (rc) { + case H_SUCCESS: + case H_DROPPED: + case H_CLOSED: + rc = ADAPT_SUCCESS; + break; + + case H_PARAMETER: + case H_HARDWARE: + break; + + default: + vscsi->state = UNDEFINED; + rc = H_HARDWARE; + break; + } + } + + return rc; +} + +/** + * ibmvscsis_reset_queue() - Reset CRQ Queue + * @vscsi: Pointer to our adapter structure + * @new_state: New state to establish after resetting the queue + * + * This function calls h_free_q and then calls h_reg_q and does all + * of the bookkeeping to get us back to where we can communicate. + * + * Actually, we don't always call h_free_crq. A problem was discovered + * where one partition would close and reopen his queue, which would + * cause his partner to get a transport event, which would cause him to + * close and reopen his queue, which would cause the original partition + * to get a transport event, etc., etc. To prevent this, we don't + * actually close our queue if the client initiated the reset, (i.e. + * either we got a transport event or we have detected that the client's + * queue is gone) + * + * EXECUTION ENVIRONMENT: + * Process environment, called with interrupt lock held + */ +static void ibmvscsis_reset_queue(struct scsi_info *vscsi, uint new_state) +{ + int bytes; + long rc = ADAPT_SUCCESS; + + pr_debug("reset_queue: flags 0x%x\n", vscsi->flags); + + /* don't reset, the client did it for us */ + if (vscsi->flags & (CLIENT_FAILED | TRANS_EVENT)) { + vscsi->flags &= PRESERVE_FLAG_FIELDS; + vscsi->rsp_q_timer.timer_pops = 0; + vscsi->debit = 0; + vscsi->credit = 0; + vscsi->state = new_state; + vio_enable_interrupts(vscsi->dma_dev); + } else { + rc = ibmvscsis_free_command_q(vscsi); + if (rc == ADAPT_SUCCESS) { + vscsi->state = new_state; + + bytes = vscsi->cmd_q.size * PAGE_SIZE; + rc = h_reg_crq(vscsi->dds.unit_id, + vscsi->cmd_q.crq_token, bytes); + if (rc == H_CLOSED || rc == H_SUCCESS) { + rc = ibmvscsis_establish_new_q(vscsi, + new_state); + } + + if (rc != ADAPT_SUCCESS) { + pr_debug("reset_queue: reg_crq rc %ld\n", rc); + + vscsi->state = ERR_DISCONNECTED; + vscsi->flags |= RESPONSE_Q_DOWN; + ibmvscsis_free_command_q(vscsi); + } + } else { + vscsi->state = ERR_DISCONNECTED; + vscsi->flags |= RESPONSE_Q_DOWN; + } + } +} + +/** + * ibmvscsis_free_cmd_resources() - Free command resources + * @vscsi: Pointer to our adapter structure + * @cmd: Command which is not longer in use + * + * Must be called with interrupt lock held. + */ +static void ibmvscsis_free_cmd_resources(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd) +{ + struct iu_entry *iue = cmd->iue; + + switch (cmd->type) { + case TASK_MANAGEMENT: + case SCSI_CDB: + /* + * When the queue goes down this value is cleared, so it + * cannot be cleared in this general purpose function. + */ + if (vscsi->debit) + vscsi->debit -= 1; + break; + case ADAPTER_MAD: + vscsi->flags &= ~PROCESSING_MAD; + break; + case UNSET_TYPE: + break; + default: + dev_err(&vscsi->dev, "free_cmd_resources unknown type %d\n", + cmd->type); + break; + } + + cmd->iue = NULL; + list_add_tail(&cmd->list, &vscsi->free_cmd); + srp_iu_put(iue); + + if (list_empty(&vscsi->active_q) && list_empty(&vscsi->schedule_q) && + list_empty(&vscsi->waiting_rsp) && (vscsi->flags & WAIT_FOR_IDLE)) { + vscsi->flags &= ~WAIT_FOR_IDLE; + complete(&vscsi->wait_idle); + } +} + /** * ibmvscsis_trans_event() - Handle a Transport Event * @vscsi: Pointer to our adapter structure @@ -896,7 +1048,7 @@ static long ibmvscsis_trans_event(struct scsi_info *vscsi, } } - rc = vscsi->flags & SCHEDULE_DISCONNECT; + rc = vscsi->flags & SCHEDULE_DISCONNECT; pr_debug("Leaving trans_event: flags 0x%x, state 0x%hx, rc %ld\n", vscsi->flags, vscsi->state, rc); @@ -1221,7 +1373,7 @@ static long ibmvscsis_copy_crq_packet(struct scsi_info *vscsi, * @iue: Information Unit containing the Adapter Info MAD request * * EXECUTION ENVIRONMENT: - * Interrupt adpater lock is held + * Interrupt adapter lock is held */ static long ibmvscsis_adapter_info(struct scsi_info *vscsi, struct iu_entry *iue) @@ -1692,7 +1844,7 @@ static void ibmvscsis_send_mad_resp(struct scsi_info *vscsi, * @crq: Pointer to the CRQ entry containing the MAD request * * EXECUTION ENVIRONMENT: - * Interrupt called with adapter lock held + * Interrupt, called with adapter lock held */ static long ibmvscsis_mad(struct scsi_info *vscsi, struct viosrp_crq *crq) { @@ -1858,7 +2010,7 @@ static long ibmvscsis_srp_login_rej(struct scsi_info *vscsi, break; case H_PERMISSION: if (connection_broken(vscsi)) - flag_bits = RESPONSE_Q_DOWN | CLIENT_FAILED; + flag_bits = RESPONSE_Q_DOWN | CLIENT_FAILED; dev_err(&vscsi->dev, "login_rej: error copying to client, rc %ld\n", rc); ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, @@ -2180,156 +2332,6 @@ static long ibmvscsis_ping_response(struct scsi_info *vscsi) return rc; } -/** - * ibmvscsis_handle_init_compl_msg() - Respond to an Init Complete Message - * @vscsi: Pointer to our adapter structure - * - * Must be called with interrupt lock held. - */ -static long ibmvscsis_handle_init_compl_msg(struct scsi_info *vscsi) -{ - long rc = ADAPT_SUCCESS; - - switch (vscsi->state) { - case NO_QUEUE: - case ERR_DISCONNECT: - case ERR_DISCONNECT_RECONNECT: - case ERR_DISCONNECTED: - case UNCONFIGURING: - case UNDEFINED: - rc = ERROR; - break; - - case WAIT_CONNECTION: - vscsi->state = CONNECTED; - break; - - case WAIT_IDLE: - case SRP_PROCESSING: - case CONNECTED: - case WAIT_ENABLED: - case PART_UP_WAIT_ENAB: - default: - rc = ERROR; - dev_err(&vscsi->dev, "init_msg: invalid state %d to get init compl msg\n", - vscsi->state); - ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); - break; - } - - return rc; -} - -/** - * ibmvscsis_handle_init_msg() - Respond to an Init Message - * @vscsi: Pointer to our adapter structure - * - * Must be called with interrupt lock held. - */ -static long ibmvscsis_handle_init_msg(struct scsi_info *vscsi) -{ - long rc = ADAPT_SUCCESS; - - switch (vscsi->state) { - case WAIT_ENABLED: - vscsi->state = PART_UP_WAIT_ENAB; - break; - - case WAIT_CONNECTION: - rc = ibmvscsis_send_init_message(vscsi, INIT_COMPLETE_MSG); - switch (rc) { - case H_SUCCESS: - vscsi->state = CONNECTED; - break; - - case H_PARAMETER: - dev_err(&vscsi->dev, "init_msg: failed to send, rc %ld\n", - rc); - ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT, 0); - break; - - case H_DROPPED: - dev_err(&vscsi->dev, "init_msg: failed to send, rc %ld\n", - rc); - rc = ERROR; - ibmvscsis_post_disconnect(vscsi, - ERR_DISCONNECT_RECONNECT, 0); - break; - - case H_CLOSED: - pr_warn("init_msg: failed to send, rc %ld\n", rc); - rc = 0; - break; - } - break; - - case UNDEFINED: - rc = ERROR; - break; - - case UNCONFIGURING: - break; - - case PART_UP_WAIT_ENAB: - case CONNECTED: - case SRP_PROCESSING: - case WAIT_IDLE: - case NO_QUEUE: - case ERR_DISCONNECT: - case ERR_DISCONNECT_RECONNECT: - case ERR_DISCONNECTED: - default: - rc = ERROR; - dev_err(&vscsi->dev, "init_msg: invalid state %d to get init msg\n", - vscsi->state); - ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); - break; - } - - return rc; -} - -/** - * ibmvscsis_init_msg() - Respond to an init message - * @vscsi: Pointer to our adapter structure - * @crq: Pointer to CRQ element containing the Init Message - * - * EXECUTION ENVIRONMENT: - * Interrupt, interrupt lock held - */ -static long ibmvscsis_init_msg(struct scsi_info *vscsi, struct viosrp_crq *crq) -{ - long rc = ADAPT_SUCCESS; - - pr_debug("init_msg: state 0x%hx\n", vscsi->state); - - rc = h_vioctl(vscsi->dds.unit_id, H_GET_PARTNER_INFO, - (u64)vscsi->map_ioba | ((u64)PAGE_SIZE << 32), 0, 0, 0, - 0); - if (rc == H_SUCCESS) { - vscsi->client_data.partition_number = - be64_to_cpu(*(u64 *)vscsi->map_buf); - pr_debug("init_msg, part num %d\n", - vscsi->client_data.partition_number); - } else { - pr_debug("init_msg h_vioctl rc %ld\n", rc); - rc = ADAPT_SUCCESS; - } - - if (crq->format == INIT_MSG) { - rc = ibmvscsis_handle_init_msg(vscsi); - } else if (crq->format == INIT_COMPLETE_MSG) { - rc = ibmvscsis_handle_init_compl_msg(vscsi); - } else { - rc = ERROR; - dev_err(&vscsi->dev, "init_msg: invalid format %d\n", - (uint)crq->format); - ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); - } - - return rc; -} - /** * ibmvscsis_parse_command() - Parse an element taken from the cmd rsp queue. * @vscsi: Pointer to our adapter structure @@ -2385,7 +2387,7 @@ static long ibmvscsis_parse_command(struct scsi_info *vscsi, break; case VALID_TRANS_EVENT: - rc = ibmvscsis_trans_event(vscsi, crq); + rc = ibmvscsis_trans_event(vscsi, crq); break; case VALID_INIT_MSG: @@ -3270,7 +3272,7 @@ static void ibmvscsis_handle_crq(unsigned long data) /* * if we are in a path where we are waiting for all pending commands * to complete because we received a transport event and anything in - * the command queue is for a new connection, do nothing + * the command queue is for a new connection, do nothing */ if (TARGET_STOP(vscsi)) { vio_enable_interrupts(vscsi->dma_dev); @@ -3314,7 +3316,7 @@ cmd_work: * everything but transport events on the queue * * need to decrement the queue index so we can - * look at the elment again + * look at the element again */ if (vscsi->cmd_q.index) vscsi->cmd_q.index -= 1; @@ -3988,10 +3990,10 @@ static struct attribute *ibmvscsis_dev_attrs[] = { ATTRIBUTE_GROUPS(ibmvscsis_dev); static struct class ibmvscsis_class = { - .name = "ibmvscsis", - .dev_release = ibmvscsis_dev_release, - .class_attrs = ibmvscsis_class_attrs, - .dev_groups = ibmvscsis_dev_groups, + .name = "ibmvscsis", + .dev_release = ibmvscsis_dev_release, + .class_attrs = ibmvscsis_class_attrs, + .dev_groups = ibmvscsis_dev_groups, }; static struct vio_device_id ibmvscsis_device_table[] = { From 94700877c25f64cc3660b0f72c0a8f2165f5c696 Mon Sep 17 00:00:00 2001 From: Michael Cyr Date: Fri, 17 Mar 2017 00:48:21 +0000 Subject: [PATCH 313/357] scsi: ibmvscsis: Synchronize cmds at tpg_enable_store time [ Upstream commit c9b3379f60a83288a5e2f8ea75476460978689b0 ] This patch changes the way the IBM vSCSI server driver manages its Command/Response Queue (CRQ). We used to register the CRQ with phyp at probe time. Now we wait until tpg_enable_store. Similarly, when tpg_enable_store is called to "disable" (i.e. the stored value is 0), we unregister the queue with phyp. One consquence to this is that we have no need for the PART_UP_WAIT_ENAB state, since we can't get an Init Message from the client in our CRQ if we're waiting to be enabled, since we haven't registered the queue yet. Signed-off-by: Michael Cyr Signed-off-by: Bryant G. Ly Tested-by: Steven Royer Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 226 ++++------------------- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h | 2 - 2 files changed, 39 insertions(+), 189 deletions(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 4feae43ee402..504e593f2879 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -62,8 +62,6 @@ static long ibmvscsis_parse_command(struct scsi_info *vscsi, static void ibmvscsis_adapter_idle(struct scsi_info *vscsi); -static void ibmvscsis_reset_queue(struct scsi_info *vscsi, uint new_state); - static void ibmvscsis_determine_resid(struct se_cmd *se_cmd, struct srp_rsp *rsp) { @@ -418,7 +416,6 @@ static void ibmvscsis_disconnect(struct work_struct *work) proc_work); u16 new_state; bool wait_idle = false; - long rc = ADAPT_SUCCESS; spin_lock_bh(&vscsi->intr_lock); new_state = vscsi->new_state; @@ -471,30 +468,12 @@ static void ibmvscsis_disconnect(struct work_struct *work) vscsi->state = new_state; break; - /* - * If this is a transition into an error state. - * a client is attempting to establish a connection - * and has violated the RPA protocol. - * There can be nothing pending on the adapter although - * there can be requests in the command queue. - */ case WAIT_ENABLED: - case PART_UP_WAIT_ENAB: switch (new_state) { - case ERR_DISCONNECT: - vscsi->flags |= RESPONSE_Q_DOWN; - vscsi->state = new_state; - vscsi->flags &= ~(SCHEDULE_DISCONNECT | - DISCONNECT_SCHEDULED); - ibmvscsis_free_command_q(vscsi); - break; - case ERR_DISCONNECT_RECONNECT: - ibmvscsis_reset_queue(vscsi, WAIT_ENABLED); - break; - /* should never happen */ + case ERR_DISCONNECT: + case ERR_DISCONNECT_RECONNECT: case WAIT_IDLE: - rc = ERROR; dev_err(&vscsi->dev, "disconnect: invalid state %d for WAIT_IDLE\n", vscsi->state); break; @@ -631,7 +610,6 @@ static void ibmvscsis_post_disconnect(struct scsi_info *vscsi, uint new_state, break; case WAIT_ENABLED: - case PART_UP_WAIT_ENAB: case WAIT_IDLE: case WAIT_CONNECTION: case CONNECTED: @@ -676,7 +654,6 @@ static long ibmvscsis_handle_init_compl_msg(struct scsi_info *vscsi) case SRP_PROCESSING: case CONNECTED: case WAIT_ENABLED: - case PART_UP_WAIT_ENAB: default: rc = ERROR; dev_err(&vscsi->dev, "init_msg: invalid state %d to get init compl msg\n", @@ -699,10 +676,6 @@ static long ibmvscsis_handle_init_msg(struct scsi_info *vscsi) long rc = ADAPT_SUCCESS; switch (vscsi->state) { - case WAIT_ENABLED: - vscsi->state = PART_UP_WAIT_ENAB; - break; - case WAIT_CONNECTION: rc = ibmvscsis_send_init_message(vscsi, INIT_COMPLETE_MSG); switch (rc) { @@ -738,7 +711,7 @@ static long ibmvscsis_handle_init_msg(struct scsi_info *vscsi) case UNCONFIGURING: break; - case PART_UP_WAIT_ENAB: + case WAIT_ENABLED: case CONNECTED: case SRP_PROCESSING: case WAIT_IDLE: @@ -801,11 +774,10 @@ static long ibmvscsis_init_msg(struct scsi_info *vscsi, struct viosrp_crq *crq) /** * ibmvscsis_establish_new_q() - Establish new CRQ queue * @vscsi: Pointer to our adapter structure - * @new_state: New state being established after resetting the queue * * Must be called with interrupt lock held. */ -static long ibmvscsis_establish_new_q(struct scsi_info *vscsi, uint new_state) +static long ibmvscsis_establish_new_q(struct scsi_info *vscsi) { long rc = ADAPT_SUCCESS; uint format; @@ -817,19 +789,19 @@ static long ibmvscsis_establish_new_q(struct scsi_info *vscsi, uint new_state) rc = vio_enable_interrupts(vscsi->dma_dev); if (rc) { - pr_warn("reset_queue: failed to enable interrupts, rc %ld\n", + pr_warn("establish_new_q: failed to enable interrupts, rc %ld\n", rc); return rc; } rc = ibmvscsis_check_init_msg(vscsi, &format); if (rc) { - dev_err(&vscsi->dev, "reset_queue: check_init_msg failed, rc %ld\n", + dev_err(&vscsi->dev, "establish_new_q: check_init_msg failed, rc %ld\n", rc); return rc; } - if (format == UNUSED_FORMAT && new_state == WAIT_CONNECTION) { + if (format == UNUSED_FORMAT) { rc = ibmvscsis_send_init_message(vscsi, INIT_MSG); switch (rc) { case H_SUCCESS: @@ -847,6 +819,8 @@ static long ibmvscsis_establish_new_q(struct scsi_info *vscsi, uint new_state) rc = H_HARDWARE; break; } + } else if (format == INIT_MSG) { + rc = ibmvscsis_handle_init_msg(vscsi); } return rc; @@ -855,7 +829,6 @@ static long ibmvscsis_establish_new_q(struct scsi_info *vscsi, uint new_state) /** * ibmvscsis_reset_queue() - Reset CRQ Queue * @vscsi: Pointer to our adapter structure - * @new_state: New state to establish after resetting the queue * * This function calls h_free_q and then calls h_reg_q and does all * of the bookkeeping to get us back to where we can communicate. @@ -872,7 +845,7 @@ static long ibmvscsis_establish_new_q(struct scsi_info *vscsi, uint new_state) * EXECUTION ENVIRONMENT: * Process environment, called with interrupt lock held */ -static void ibmvscsis_reset_queue(struct scsi_info *vscsi, uint new_state) +static void ibmvscsis_reset_queue(struct scsi_info *vscsi) { int bytes; long rc = ADAPT_SUCCESS; @@ -885,19 +858,18 @@ static void ibmvscsis_reset_queue(struct scsi_info *vscsi, uint new_state) vscsi->rsp_q_timer.timer_pops = 0; vscsi->debit = 0; vscsi->credit = 0; - vscsi->state = new_state; + vscsi->state = WAIT_CONNECTION; vio_enable_interrupts(vscsi->dma_dev); } else { rc = ibmvscsis_free_command_q(vscsi); if (rc == ADAPT_SUCCESS) { - vscsi->state = new_state; + vscsi->state = WAIT_CONNECTION; bytes = vscsi->cmd_q.size * PAGE_SIZE; rc = h_reg_crq(vscsi->dds.unit_id, vscsi->cmd_q.crq_token, bytes); if (rc == H_CLOSED || rc == H_SUCCESS) { - rc = ibmvscsis_establish_new_q(vscsi, - new_state); + rc = ibmvscsis_establish_new_q(vscsi); } if (rc != ADAPT_SUCCESS) { @@ -1016,10 +988,6 @@ static long ibmvscsis_trans_event(struct scsi_info *vscsi, TRANS_EVENT)); break; - case PART_UP_WAIT_ENAB: - vscsi->state = WAIT_ENABLED; - break; - case SRP_PROCESSING: if ((vscsi->debit > 0) || !list_empty(&vscsi->schedule_q) || @@ -1220,15 +1188,18 @@ static void ibmvscsis_adapter_idle(struct scsi_info *vscsi) switch (vscsi->state) { case ERR_DISCONNECT_RECONNECT: - ibmvscsis_reset_queue(vscsi, WAIT_CONNECTION); + ibmvscsis_reset_queue(vscsi); pr_debug("adapter_idle, disc_rec: flags 0x%x\n", vscsi->flags); break; case ERR_DISCONNECT: ibmvscsis_free_command_q(vscsi); - vscsi->flags &= ~DISCONNECT_SCHEDULED; + vscsi->flags &= ~(SCHEDULE_DISCONNECT | DISCONNECT_SCHEDULED); vscsi->flags |= RESPONSE_Q_DOWN; - vscsi->state = ERR_DISCONNECTED; + if (vscsi->tport.enabled) + vscsi->state = ERR_DISCONNECTED; + else + vscsi->state = WAIT_ENABLED; pr_debug("adapter_idle, disc: flags 0x%x, state 0x%hx\n", vscsi->flags, vscsi->state); break; @@ -1773,8 +1744,8 @@ static void ibmvscsis_send_messages(struct scsi_info *vscsi) be64_to_cpu(msg_hi), be64_to_cpu(cmd->rsp.tag)); - pr_debug("send_messages: tag 0x%llx, rc %ld\n", - be64_to_cpu(cmd->rsp.tag), rc); + pr_debug("send_messages: cmd %p, tag 0x%llx, rc %ld\n", + cmd, be64_to_cpu(cmd->rsp.tag), rc); /* if all ok free up the command element resources */ if (rc == H_SUCCESS) { @@ -2787,36 +2758,6 @@ static irqreturn_t ibmvscsis_interrupt(int dummy, void *data) return IRQ_HANDLED; } -/** - * ibmvscsis_check_q() - Helper function to Check Init Message Valid - * @vscsi: Pointer to our adapter structure - * - * Checks if a initialize message was queued by the initiatior - * while the timing window was open. This function is called from - * probe after the CRQ is created and interrupts are enabled. - * It would only be used by adapters who wait for some event before - * completing the init handshake with the client. For ibmvscsi, this - * event is waiting for the port to be enabled. - * - * EXECUTION ENVIRONMENT: - * Process level only, interrupt lock held - */ -static long ibmvscsis_check_q(struct scsi_info *vscsi) -{ - uint format; - long rc; - - rc = ibmvscsis_check_init_msg(vscsi, &format); - if (rc) - ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); - else if (format == UNUSED_FORMAT) - vscsi->state = WAIT_ENABLED; - else - vscsi->state = PART_UP_WAIT_ENAB; - - return rc; -} - /** * ibmvscsis_enable_change_state() - Set new state based on enabled status * @vscsi: Pointer to our adapter structure @@ -2828,77 +2769,19 @@ static long ibmvscsis_check_q(struct scsi_info *vscsi) */ static long ibmvscsis_enable_change_state(struct scsi_info *vscsi) { + int bytes; long rc = ADAPT_SUCCESS; -handle_state_change: - switch (vscsi->state) { - case WAIT_ENABLED: - rc = ibmvscsis_send_init_message(vscsi, INIT_MSG); - switch (rc) { - case H_SUCCESS: - case H_DROPPED: - case H_CLOSED: - vscsi->state = WAIT_CONNECTION; - rc = ADAPT_SUCCESS; - break; + bytes = vscsi->cmd_q.size * PAGE_SIZE; + rc = h_reg_crq(vscsi->dds.unit_id, vscsi->cmd_q.crq_token, bytes); + if (rc == H_CLOSED || rc == H_SUCCESS) { + vscsi->state = WAIT_CONNECTION; + rc = ibmvscsis_establish_new_q(vscsi); + } - case H_PARAMETER: - break; - - case H_HARDWARE: - break; - - default: - vscsi->state = UNDEFINED; - rc = H_HARDWARE; - break; - } - break; - case PART_UP_WAIT_ENAB: - rc = ibmvscsis_send_init_message(vscsi, INIT_COMPLETE_MSG); - switch (rc) { - case H_SUCCESS: - vscsi->state = CONNECTED; - rc = ADAPT_SUCCESS; - break; - - case H_DROPPED: - case H_CLOSED: - vscsi->state = WAIT_ENABLED; - goto handle_state_change; - - case H_PARAMETER: - break; - - case H_HARDWARE: - break; - - default: - rc = H_HARDWARE; - break; - } - break; - - case WAIT_CONNECTION: - case WAIT_IDLE: - case SRP_PROCESSING: - case CONNECTED: - rc = ADAPT_SUCCESS; - break; - /* should not be able to get here */ - case UNCONFIGURING: - rc = ERROR; - vscsi->state = UNDEFINED; - break; - - /* driver should never allow this to happen */ - case ERR_DISCONNECT: - case ERR_DISCONNECT_RECONNECT: - default: - dev_err(&vscsi->dev, "in invalid state %d during enable_change_state\n", - vscsi->state); - rc = ADAPT_SUCCESS; - break; + if (rc != ADAPT_SUCCESS) { + vscsi->state = ERR_DISCONNECTED; + vscsi->flags |= RESPONSE_Q_DOWN; } return rc; @@ -2918,7 +2801,6 @@ handle_state_change: */ static long ibmvscsis_create_command_q(struct scsi_info *vscsi, int num_cmds) { - long rc = 0; int pages; struct vio_dev *vdev = vscsi->dma_dev; @@ -2942,22 +2824,7 @@ static long ibmvscsis_create_command_q(struct scsi_info *vscsi, int num_cmds) return -ENOMEM; } - rc = h_reg_crq(vscsi->dds.unit_id, vscsi->cmd_q.crq_token, PAGE_SIZE); - if (rc) { - if (rc == H_CLOSED) { - vscsi->state = WAIT_ENABLED; - rc = 0; - } else { - dma_unmap_single(&vdev->dev, vscsi->cmd_q.crq_token, - PAGE_SIZE, DMA_BIDIRECTIONAL); - free_page((unsigned long)vscsi->cmd_q.base_addr); - rc = -ENODEV; - } - } else { - vscsi->state = WAIT_ENABLED; - } - - return rc; + return 0; } /** @@ -3491,31 +3358,12 @@ static int ibmvscsis_probe(struct vio_dev *vdev, goto destroy_WQ; } - spin_lock_bh(&vscsi->intr_lock); - vio_enable_interrupts(vdev); - if (rc) { - dev_err(&vscsi->dev, "enabling interrupts failed, rc %d\n", rc); - rc = -ENODEV; - spin_unlock_bh(&vscsi->intr_lock); - goto free_irq; - } - - if (ibmvscsis_check_q(vscsi)) { - rc = ERROR; - dev_err(&vscsi->dev, "probe: check_q failed, rc %d\n", rc); - spin_unlock_bh(&vscsi->intr_lock); - goto disable_interrupt; - } - spin_unlock_bh(&vscsi->intr_lock); + vscsi->state = WAIT_ENABLED; dev_set_drvdata(&vdev->dev, vscsi); return 0; -disable_interrupt: - vio_disable_interrupts(vdev); -free_irq: - free_irq(vdev->irq, vscsi); destroy_WQ: destroy_workqueue(vscsi->work_q); unmap_buf: @@ -3909,18 +3757,22 @@ static ssize_t ibmvscsis_tpg_enable_store(struct config_item *item, } if (tmp) { - tport->enabled = true; spin_lock_bh(&vscsi->intr_lock); + tport->enabled = true; lrc = ibmvscsis_enable_change_state(vscsi); if (lrc) pr_err("enable_change_state failed, rc %ld state %d\n", lrc, vscsi->state); spin_unlock_bh(&vscsi->intr_lock); } else { + spin_lock_bh(&vscsi->intr_lock); tport->enabled = false; + /* This simulates the server going down */ + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT, 0); + spin_unlock_bh(&vscsi->intr_lock); } - pr_debug("tpg_enable_store, state %d\n", vscsi->state); + pr_debug("tpg_enable_store, tmp %ld, state %d\n", tmp, vscsi->state); return count; } diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h index 981a0c992b6c..17e0ef43c525 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h @@ -204,8 +204,6 @@ struct scsi_info { struct list_head waiting_rsp; #define NO_QUEUE 0x00 #define WAIT_ENABLED 0X01 - /* driver has received an initialize command */ -#define PART_UP_WAIT_ENAB 0x02 #define WAIT_CONNECTION 0x04 /* have established a connection */ #define CONNECTED 0x08 From 456be98b4e79a2ed0316bbc0e885c526cbe8afad Mon Sep 17 00:00:00 2001 From: Michael Cyr Date: Fri, 17 Mar 2017 00:48:22 +0000 Subject: [PATCH 314/357] scsi: ibmvscsis: Synchronize cmds at remove time [ Upstream commit 8bf11557d44d00562360d370de8aa70ba89aa0d5 ] This patch adds code to disconnect from the client, which will make sure any outstanding commands have been completed, before continuing on with the remove operation. Signed-off-by: Michael Cyr Signed-off-by: Bryant G. Ly Tested-by: Steven Royer Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 39 +++++++++++++++++++++--- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h | 3 ++ 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 504e593f2879..c4fe95a25621 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -470,6 +470,18 @@ static void ibmvscsis_disconnect(struct work_struct *work) case WAIT_ENABLED: switch (new_state) { + case UNCONFIGURING: + vscsi->state = new_state; + vscsi->flags |= RESPONSE_Q_DOWN; + vscsi->flags &= ~(SCHEDULE_DISCONNECT | + DISCONNECT_SCHEDULED); + dma_rmb(); + if (vscsi->flags & CFG_SLEEPING) { + vscsi->flags &= ~CFG_SLEEPING; + complete(&vscsi->unconfig); + } + break; + /* should never happen */ case ERR_DISCONNECT: case ERR_DISCONNECT_RECONNECT: @@ -482,6 +494,13 @@ static void ibmvscsis_disconnect(struct work_struct *work) case WAIT_IDLE: switch (new_state) { + case UNCONFIGURING: + vscsi->flags |= RESPONSE_Q_DOWN; + vscsi->state = new_state; + vscsi->flags &= ~(SCHEDULE_DISCONNECT | + DISCONNECT_SCHEDULED); + ibmvscsis_free_command_q(vscsi); + break; case ERR_DISCONNECT: case ERR_DISCONNECT_RECONNECT: vscsi->state = new_state; @@ -1187,6 +1206,15 @@ static void ibmvscsis_adapter_idle(struct scsi_info *vscsi) free_qs = true; switch (vscsi->state) { + case UNCONFIGURING: + ibmvscsis_free_command_q(vscsi); + dma_rmb(); + isync(); + if (vscsi->flags & CFG_SLEEPING) { + vscsi->flags &= ~CFG_SLEEPING; + complete(&vscsi->unconfig); + } + break; case ERR_DISCONNECT_RECONNECT: ibmvscsis_reset_queue(vscsi); pr_debug("adapter_idle, disc_rec: flags 0x%x\n", vscsi->flags); @@ -3342,6 +3370,7 @@ static int ibmvscsis_probe(struct vio_dev *vdev, (unsigned long)vscsi); init_completion(&vscsi->wait_idle); + init_completion(&vscsi->unconfig); snprintf(wq_name, 24, "ibmvscsis%s", dev_name(&vdev->dev)); vscsi->work_q = create_workqueue(wq_name); @@ -3397,10 +3426,11 @@ static int ibmvscsis_remove(struct vio_dev *vdev) pr_debug("remove (%s)\n", dev_name(&vscsi->dma_dev->dev)); - /* - * TBD: Need to handle if there are commands on the waiting_rsp q - * Actually, can there still be cmds outstanding to tcm? - */ + spin_lock_bh(&vscsi->intr_lock); + ibmvscsis_post_disconnect(vscsi, UNCONFIGURING, 0); + vscsi->flags |= CFG_SLEEPING; + spin_unlock_bh(&vscsi->intr_lock); + wait_for_completion(&vscsi->unconfig); vio_disable_interrupts(vdev); free_irq(vdev->irq, vscsi); @@ -3409,7 +3439,6 @@ static int ibmvscsis_remove(struct vio_dev *vdev) DMA_BIDIRECTIONAL); kfree(vscsi->map_buf); tasklet_kill(&vscsi->work_task); - ibmvscsis_unregister_command_q(vscsi); ibmvscsis_destroy_command_q(vscsi); ibmvscsis_freetimer(vscsi); ibmvscsis_free_cmds(vscsi); diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h index 17e0ef43c525..98b0ca79a5c5 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h @@ -257,6 +257,8 @@ struct scsi_info { #define SCHEDULE_DISCONNECT 0x00400 /* disconnect handler is scheduled */ #define DISCONNECT_SCHEDULED 0x00800 + /* remove function is sleeping */ +#define CFG_SLEEPING 0x01000 u32 flags; /* adapter lock */ spinlock_t intr_lock; @@ -285,6 +287,7 @@ struct scsi_info { struct workqueue_struct *work_q; struct completion wait_idle; + struct completion unconfig; struct device dev; struct vio_dev *dma_dev; struct srp_target target; From 29d928785f5e76b5abfafe4a91cc1db4155cd677 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 17 Mar 2017 00:48:22 +0000 Subject: [PATCH 315/357] x86/hyperv: Handle unknown NMIs on one CPU when unknown_nmi_panic [ Upstream commit 59107e2f48831daedc46973ce4988605ab066de3 ] There is a feature in Hyper-V ('Debug-VM --InjectNonMaskableInterrupt') which injects NMI to the guest. We may want to crash the guest and do kdump on this NMI by enabling unknown_nmi_panic. To make kdump succeed we need to allow the kdump kernel to re-establish VMBus connection so it will see VMBus devices (storage, network,..). To properly unload VMBus making it possible to start over during kdump we need to do the following: - Send an 'unload' message to the hypervisor. This can be done on any CPU so we do this the crashing CPU. - Receive the 'unload finished' reply message. WS2012R2 delivers this message to the CPU which was used to establish VMBus connection during module load and this CPU may differ from the CPU sending 'unload'. Receiving a VMBus message means the following: - There is a per-CPU slot in memory for one message. This slot can in theory be accessed by any CPU. - We get an interrupt on the CPU when a message was placed into the slot. - When we read the message we need to clear the slot and signal the fact to the hypervisor. In case there are more messages to this CPU pending the hypervisor will deliver the next message. The signaling is done by writing to an MSR so this can only be done on the appropriate CPU. To avoid doing cross-CPU work on crash we have vmbus_wait_for_unload() function which checks message slots for all CPUs in a loop waiting for the 'unload finished' messages. However, there is an issue which arises when these conditions are met: - We're crashing on a CPU which is different from the one which was used to initially contact the hypervisor. - The CPU which was used for the initial contact is blocked with interrupts disabled and there is a message pending in the message slot. In this case we won't be able to read the 'unload finished' message on the crashing CPU. This is reproducible when we receive unknown NMIs on all CPUs simultaneously: the first CPU entering panic() will proceed to crash and all other CPUs will stop themselves with interrupts disabled. The suggested solution is to handle unknown NMIs for Hyper-V guests on the first CPU which gets them only. This will allow us to rely on VMBus interrupt handler being able to receive the 'unload finish' message in case it is delivered to a different CPU. The issue is not reproducible on WS2016 as Debug-VM delivers NMI to the boot CPU only, WS2012R2 and earlier Hyper-V versions are affected. Signed-off-by: Vitaly Kuznetsov Acked-by: K. Y. Srinivasan Cc: devel@linuxdriverproject.org Cc: Haiyang Zhang Link: http://lkml.kernel.org/r/20161202100720.28121-1-vkuznets@redhat.com Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mshyperv.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 8f44c5a50ab8..f228f74051b6 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -31,6 +31,7 @@ #include #include #include +#include struct ms_hyperv_info ms_hyperv; EXPORT_SYMBOL_GPL(ms_hyperv); @@ -158,6 +159,26 @@ static unsigned char hv_get_nmi_reason(void) return 0; } +#ifdef CONFIG_X86_LOCAL_APIC +/* + * Prior to WS2016 Debug-VM sends NMIs to all CPUs which makes + * it dificult to process CHANNELMSG_UNLOAD in case of crash. Handle + * unknown NMI on the first CPU which gets it. + */ +static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs) +{ + static atomic_t nmi_cpu = ATOMIC_INIT(-1); + + if (!unknown_nmi_panic) + return NMI_DONE; + + if (atomic_cmpxchg(&nmi_cpu, -1, raw_smp_processor_id()) != -1) + return NMI_HANDLED; + + return NMI_DONE; +} +#endif + static void __init ms_hyperv_init_platform(void) { /* @@ -183,6 +204,9 @@ static void __init ms_hyperv_init_platform(void) pr_info("HyperV: LAPIC Timer Frequency: %#x\n", lapic_timer_frequency); } + + register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST, + "hv_nmi_unknown"); #endif if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) From 6a5f3e664ac798b54448fc55e7e7c3a9fd1ee9d4 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 17 Mar 2017 00:48:22 +0000 Subject: [PATCH 316/357] PCI: Separate VF BAR updates from standard BAR updates [ Upstream commit 6ffa2489c51da77564a0881a73765ea2169f955d ] Previously pci_update_resource() used the same code path for updating standard BARs and VF BARs in SR-IOV capabilities. Split the VF BAR update into a new pci_iov_update_resource() internal interface, which makes it simpler to compute the BAR address (we can get rid of pci_resource_bar() and pci_iov_resource_bar()). This patch: - Renames pci_update_resource() to pci_std_update_resource(), - Adds pci_iov_update_resource(), - Makes pci_update_resource() a wrapper that calls the appropriate one, No functional change intended. Signed-off-by: Bjorn Helgaas Reviewed-by: Gavin Shan Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/iov.c | 50 +++++++++++++++++++++++++++++++++++++++++ drivers/pci/pci.h | 1 + drivers/pci/setup-res.c | 13 +++++++++-- 3 files changed, 62 insertions(+), 2 deletions(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index e30f05c8517f..39888cfe6b23 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -571,6 +571,56 @@ int pci_iov_resource_bar(struct pci_dev *dev, int resno) 4 * (resno - PCI_IOV_RESOURCES); } +/** + * pci_iov_update_resource - update a VF BAR + * @dev: the PCI device + * @resno: the resource number + * + * Update a VF BAR in the SR-IOV capability of a PF. + */ +void pci_iov_update_resource(struct pci_dev *dev, int resno) +{ + struct pci_sriov *iov = dev->is_physfn ? dev->sriov : NULL; + struct resource *res = dev->resource + resno; + int vf_bar = resno - PCI_IOV_RESOURCES; + struct pci_bus_region region; + u32 new; + int reg; + + /* + * The generic pci_restore_bars() path calls this for all devices, + * including VFs and non-SR-IOV devices. If this is not a PF, we + * have nothing to do. + */ + if (!iov) + return; + + /* + * Ignore unimplemented BARs, unused resource slots for 64-bit + * BARs, and non-movable resources, e.g., those described via + * Enhanced Allocation. + */ + if (!res->flags) + return; + + if (res->flags & IORESOURCE_UNSET) + return; + + if (res->flags & IORESOURCE_PCI_FIXED) + return; + + pcibios_resource_to_bus(dev->bus, ®ion, res); + new = region.start; + new |= res->flags & ~PCI_BASE_ADDRESS_MEM_MASK; + + reg = iov->pos + PCI_SRIOV_BAR + 4 * vf_bar; + pci_write_config_dword(dev, reg, new); + if (res->flags & IORESOURCE_MEM_64) { + new = region.start >> 16 >> 16; + pci_write_config_dword(dev, reg + 4, new); + } +} + resource_size_t __weak pcibios_iov_resource_alignment(struct pci_dev *dev, int resno) { diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 451856210e18..5bfcb922f7f7 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -290,6 +290,7 @@ static inline void pci_restore_ats_state(struct pci_dev *dev) int pci_iov_init(struct pci_dev *dev); void pci_iov_release(struct pci_dev *dev); int pci_iov_resource_bar(struct pci_dev *dev, int resno); +void pci_iov_update_resource(struct pci_dev *dev, int resno); resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno); void pci_restore_iov_state(struct pci_dev *dev); int pci_iov_bus_range(struct pci_bus *bus); diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 9526e341988b..695f32daf466 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -25,8 +25,7 @@ #include #include "pci.h" - -void pci_update_resource(struct pci_dev *dev, int resno) +static void pci_std_update_resource(struct pci_dev *dev, int resno) { struct pci_bus_region region; bool disable; @@ -110,6 +109,16 @@ void pci_update_resource(struct pci_dev *dev, int resno) pci_write_config_word(dev, PCI_COMMAND, cmd); } +void pci_update_resource(struct pci_dev *dev, int resno) +{ + if (resno <= PCI_ROM_RESOURCE) + pci_std_update_resource(dev, resno); +#ifdef CONFIG_PCI_IOV + else if (resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END) + pci_iov_update_resource(dev, resno); +#endif +} + int pci_claim_resource(struct pci_dev *dev, int resource) { struct resource *res = &dev->resource[resource]; From 7b65c3a84311026200b19eb86c9a76ff004402f8 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 17 Mar 2017 00:48:23 +0000 Subject: [PATCH 317/357] PCI: Remove pci_resource_bar() and pci_iov_resource_bar() [ Upstream commit 286c2378aaccc7343ebf17ec6cd86567659caf70 ] pci_std_update_resource() only deals with standard BARs, so we don't have to worry about the complications of VF BARs in an SR-IOV capability. Compute the BAR address inline and remove pci_resource_bar(). That makes pci_iov_resource_bar() unused, so remove that as well. Signed-off-by: Bjorn Helgaas Reviewed-by: Gavin Shan Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/iov.c | 18 ------------------ drivers/pci/pci.c | 30 ------------------------------ drivers/pci/pci.h | 6 ------ drivers/pci/setup-res.c | 13 +++++++------ 4 files changed, 7 insertions(+), 60 deletions(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 39888cfe6b23..3fb8d2a9fb96 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -553,24 +553,6 @@ void pci_iov_release(struct pci_dev *dev) sriov_release(dev); } -/** - * pci_iov_resource_bar - get position of the SR-IOV BAR - * @dev: the PCI device - * @resno: the resource number - * - * Returns position of the BAR encapsulated in the SR-IOV capability. - */ -int pci_iov_resource_bar(struct pci_dev *dev, int resno) -{ - if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCE_END) - return 0; - - BUG_ON(!dev->is_physfn); - - return dev->sriov->pos + PCI_SRIOV_BAR + - 4 * (resno - PCI_IOV_RESOURCES); -} - /** * pci_iov_update_resource - update a VF BAR * @dev: the PCI device diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index eda6a7cf0e54..1fc2cf4b5ab6 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4835,36 +4835,6 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags) } EXPORT_SYMBOL(pci_select_bars); -/** - * pci_resource_bar - get position of the BAR associated with a resource - * @dev: the PCI device - * @resno: the resource number - * @type: the BAR type to be filled in - * - * Returns BAR position in config space, or 0 if the BAR is invalid. - */ -int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type) -{ - int reg; - - if (resno < PCI_ROM_RESOURCE) { - *type = pci_bar_unknown; - return PCI_BASE_ADDRESS_0 + 4 * resno; - } else if (resno == PCI_ROM_RESOURCE) { - *type = pci_bar_mem32; - return dev->rom_base_reg; - } else if (resno < PCI_BRIDGE_RESOURCES) { - /* device specific resource */ - *type = pci_bar_unknown; - reg = pci_iov_resource_bar(dev, resno); - if (reg) - return reg; - } - - dev_err(&dev->dev, "BAR %d: invalid resource\n", resno); - return 0; -} - /* Some architectures require additional programming to enable VGA */ static arch_set_vga_state_t arch_set_vga_state; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 5bfcb922f7f7..a5d37f6a9fb5 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -245,7 +245,6 @@ bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl, int pci_setup_device(struct pci_dev *dev); int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, struct resource *res, unsigned int reg); -int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type); void pci_configure_ari(struct pci_dev *dev); void __pci_bus_size_bridges(struct pci_bus *bus, struct list_head *realloc_head); @@ -289,7 +288,6 @@ static inline void pci_restore_ats_state(struct pci_dev *dev) #ifdef CONFIG_PCI_IOV int pci_iov_init(struct pci_dev *dev); void pci_iov_release(struct pci_dev *dev); -int pci_iov_resource_bar(struct pci_dev *dev, int resno); void pci_iov_update_resource(struct pci_dev *dev, int resno); resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno); void pci_restore_iov_state(struct pci_dev *dev); @@ -304,10 +302,6 @@ static inline void pci_iov_release(struct pci_dev *dev) { } -static inline int pci_iov_resource_bar(struct pci_dev *dev, int resno) -{ - return 0; -} static inline void pci_restore_iov_state(struct pci_dev *dev) { } diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 695f32daf466..182d0f065a42 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -32,7 +32,6 @@ static void pci_std_update_resource(struct pci_dev *dev, int resno) u16 cmd; u32 new, check, mask; int reg; - enum pci_bar_type type; struct resource *res = dev->resource + resno; if (dev->is_virtfn) { @@ -66,14 +65,16 @@ static void pci_std_update_resource(struct pci_dev *dev, int resno) else mask = (u32)PCI_BASE_ADDRESS_MEM_MASK; - reg = pci_resource_bar(dev, resno, &type); - if (!reg) - return; - if (type != pci_bar_unknown) { + if (resno < PCI_ROM_RESOURCE) { + reg = PCI_BASE_ADDRESS_0 + 4 * resno; + } else if (resno == PCI_ROM_RESOURCE) { if (!(res->flags & IORESOURCE_ROM_ENABLE)) return; + + reg = dev->rom_base_reg; new |= PCI_ROM_ADDRESS_ENABLE; - } + } else + return; /* * We can't update a 64-bit BAR atomically, so when possible, From ed09d211d28e260e81b857e1933e43a3f230cf34 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 17 Mar 2017 00:48:23 +0000 Subject: [PATCH 318/357] PCI: Add comments about ROM BAR updating [ Upstream commit 0b457dde3cf8b7c76a60f8e960f21bbd4abdc416 ] pci_update_resource() updates a hardware BAR so its address matches the kernel's struct resource UNLESS it's a disabled ROM BAR. We only update those when we enable the ROM. It's not obvious from the code why ROM BARs should be handled specially. Apparently there are Matrox devices with defective ROM BARs that read as zero when disabled. That means that if pci_enable_rom() reads the disabled BAR, sets PCI_ROM_ADDRESS_ENABLE (without re-inserting the address), and writes it back, it would enable the ROM at address zero. Add comments and references to explain why we can't make the code look more rational. The code changes are from 755528c860b0 ("Ignore disabled ROM resources at setup") and 8085ce084c0f ("[PATCH] Fix PCI ROM mapping"). Link: https://lkml.org/lkml/2005/8/30/138 Signed-off-by: Bjorn Helgaas Reviewed-by: Gavin Shan Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/rom.c | 5 +++++ drivers/pci/setup-res.c | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c index 06663d391b39..b6edb187d160 100644 --- a/drivers/pci/rom.c +++ b/drivers/pci/rom.c @@ -35,6 +35,11 @@ int pci_enable_rom(struct pci_dev *pdev) if (res->flags & IORESOURCE_ROM_SHADOW) return 0; + /* + * Ideally pci_update_resource() would update the ROM BAR address, + * and we would only set the enable bit here. But apparently some + * devices have buggy ROM BARs that read as zero when disabled. + */ pcibios_resource_to_bus(pdev->bus, ®ion, res); pci_read_config_dword(pdev, pdev->rom_base_reg, &rom_addr); rom_addr &= ~PCI_ROM_ADDRESS_MASK; diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 182d0f065a42..bd6fca91c736 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -68,6 +68,12 @@ static void pci_std_update_resource(struct pci_dev *dev, int resno) if (resno < PCI_ROM_RESOURCE) { reg = PCI_BASE_ADDRESS_0 + 4 * resno; } else if (resno == PCI_ROM_RESOURCE) { + + /* + * Apparently some Matrox devices have ROM BARs that read + * as zero when disabled, so don't update ROM BARs unless + * they're enabled. See https://lkml.org/lkml/2005/8/30/138. + */ if (!(res->flags & IORESOURCE_ROM_ENABLE)) return; From bb479246bb3ee7d8f4b1caada0db9a268e958f1e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 17 Mar 2017 00:48:23 +0000 Subject: [PATCH 319/357] PCI: Decouple IORESOURCE_ROM_ENABLE and PCI_ROM_ADDRESS_ENABLE [ Upstream commit 7a6d312b50e63f598f5b5914c4fd21878ac2b595 ] Remove the assumption that IORESOURCE_ROM_ENABLE == PCI_ROM_ADDRESS_ENABLE. PCI_ROM_ADDRESS_ENABLE is the ROM enable bit defined by the PCI spec, so if we're reading or writing a BAR register value, that's what we should use. IORESOURCE_ROM_ENABLE is a corresponding bit in struct resource flags. Signed-off-by: Bjorn Helgaas Reviewed-by: Gavin Shan Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/probe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 300770cdc084..d266d800f246 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -227,7 +227,8 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, mask64 = (u32)PCI_BASE_ADDRESS_MEM_MASK; } } else { - res->flags |= (l & IORESOURCE_ROM_ENABLE); + if (l & PCI_ROM_ADDRESS_ENABLE) + res->flags |= IORESOURCE_ROM_ENABLE; l64 = l & PCI_ROM_ADDRESS_MASK; sz64 = sz & PCI_ROM_ADDRESS_MASK; mask64 = (u32)PCI_ROM_ADDRESS_MASK; From a38012dc69eb2fb1a660bbc88e98c882b5cf2fcb Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 17 Mar 2017 00:48:24 +0000 Subject: [PATCH 320/357] PCI: Don't update VF BARs while VF memory space is enabled [ Upstream commit 546ba9f8f22f71b0202b6ba8967be5cc6dae4e21 ] If we update a VF BAR while it's enabled, there are two potential problems: 1) Any driver that's using the VF has a cached BAR value that is stale after the update, and 2) We can't update 64-bit BARs atomically, so the intermediate state (new lower dword with old upper dword) may conflict with another device, and an access by a driver unrelated to the VF may cause a bus error. Warn about attempts to update VF BARs while they are enabled. This is a programming error, so use dev_WARN() to get a backtrace. Signed-off-by: Bjorn Helgaas Reviewed-by: Gavin Shan Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/iov.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 3fb8d2a9fb96..8255d28cf115 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -566,6 +566,7 @@ void pci_iov_update_resource(struct pci_dev *dev, int resno) struct resource *res = dev->resource + resno; int vf_bar = resno - PCI_IOV_RESOURCES; struct pci_bus_region region; + u16 cmd; u32 new; int reg; @@ -577,6 +578,13 @@ void pci_iov_update_resource(struct pci_dev *dev, int resno) if (!iov) return; + pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &cmd); + if ((cmd & PCI_SRIOV_CTRL_VFE) && (cmd & PCI_SRIOV_CTRL_MSE)) { + dev_WARN(&dev->dev, "can't update enabled VF BAR%d %pR\n", + vf_bar, res); + return; + } + /* * Ignore unimplemented BARs, unused resource slots for 64-bit * BARs, and non-movable resources, e.g., those described via From 74cce811a4b60d64c5c30496f326ecd18a72a6b2 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 17 Mar 2017 00:48:24 +0000 Subject: [PATCH 321/357] PCI: Update BARs using property bits appropriate for type [ Upstream commit 45d004f4afefdd8d79916ee6d97a9ecd94bb1ffe ] The BAR property bits (0-3 for memory BARs, 0-1 for I/O BARs) are supposed to be read-only, but we do save them in res->flags and include them when updating the BAR. Mask the I/O property bits with ~PCI_BASE_ADDRESS_IO_MASK (0x3) instead of PCI_REGION_FLAG_MASK (0xf) to make it obvious that we can't corrupt bits 2-3 of I/O addresses. Use PCI_ROM_ADDRESS_MASK for ROM BARs. This means we'll only check the top 21 bits (instead of the 28 bits we used to check) of a ROM BAR to see if the update was successful. Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/setup-res.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index bd6fca91c736..8a7322c4775d 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -58,12 +58,17 @@ static void pci_std_update_resource(struct pci_dev *dev, int resno) return; pcibios_resource_to_bus(dev->bus, ®ion, res); + new = region.start; - new = region.start | (res->flags & PCI_REGION_FLAG_MASK); - if (res->flags & IORESOURCE_IO) + if (res->flags & IORESOURCE_IO) { mask = (u32)PCI_BASE_ADDRESS_IO_MASK; - else + new |= res->flags & ~PCI_BASE_ADDRESS_IO_MASK; + } else if (resno == PCI_ROM_RESOURCE) { + mask = (u32)PCI_ROM_ADDRESS_MASK; + } else { mask = (u32)PCI_BASE_ADDRESS_MEM_MASK; + new |= res->flags & ~PCI_BASE_ADDRESS_MEM_MASK; + } if (resno < PCI_ROM_RESOURCE) { reg = PCI_BASE_ADDRESS_0 + 4 * resno; From 3d58444dea81ba0556ffcb356a19fc2d224d4eac Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 17 Mar 2017 00:48:24 +0000 Subject: [PATCH 322/357] PCI: Ignore BAR updates on virtual functions [ Upstream commit 63880b230a4af502c56dde3d4588634c70c66006 ] VF BARs are read-only zero, so updating VF BARs will not have any effect. See the SR-IOV spec r1.1, sec 3.4.1.11. We already ignore these updates because of 70675e0b6a1a ("PCI: Don't try to restore VF BARs"); this merely restructures it slightly to make it easier to split updates for standard and SR-IOV BARs. Signed-off-by: Bjorn Helgaas Reviewed-by: Gavin Shan Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 4 ---- drivers/pci/setup-res.c | 5 ++--- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 1fc2cf4b5ab6..6922964e3dff 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -564,10 +564,6 @@ static void pci_restore_bars(struct pci_dev *dev) { int i; - /* Per SR-IOV spec 3.4.1.11, VF BARs are RO zero */ - if (dev->is_virtfn) - return; - for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) pci_update_resource(dev, i); } diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 8a7322c4775d..4bc589ee78d0 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -34,10 +34,9 @@ static void pci_std_update_resource(struct pci_dev *dev, int resno) int reg; struct resource *res = dev->resource + resno; - if (dev->is_virtfn) { - dev_warn(&dev->dev, "can't update VF BAR%d\n", resno); + /* Per SR-IOV spec 3.4.1.11, VF BARs are RO zero */ + if (dev->is_virtfn) return; - } /* * Ignore resources for unimplemented BARs and unused resource slots From fb7c521a1460ad46a17859274d79f30599bdb5ea Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 17 Mar 2017 00:48:25 +0000 Subject: [PATCH 323/357] PCI: Do any VF BAR updates before enabling the BARs [ Upstream commit f40ec3c748c6912f6266c56a7f7992de61b255ed ] Previously we enabled VFs and enable their memory space before calling pcibios_sriov_enable(). But pcibios_sriov_enable() may update the VF BARs: for example, on PPC PowerNV we may change them to manage the association of VFs to PEs. Because 64-bit BARs cannot be updated atomically, it's unsafe to update them while they're enabled. The half-updated state may conflict with other devices in the system. Call pcibios_sriov_enable() before enabling the VFs so any BAR updates happen while the VF BARs are disabled. [bhelgaas: changelog] Tested-by: Carol Soto Signed-off-by: Gavin Shan Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/iov.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 8255d28cf115..47227820406d 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -306,13 +306,6 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) return rc; } - pci_iov_set_numvfs(dev, nr_virtfn); - iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; - pci_cfg_access_lock(dev); - pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); - msleep(100); - pci_cfg_access_unlock(dev); - iov->initial_VFs = initial; if (nr_virtfn < initial) initial = nr_virtfn; @@ -323,6 +316,13 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) goto err_pcibios; } + pci_iov_set_numvfs(dev, nr_virtfn); + iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; + pci_cfg_access_lock(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + msleep(100); + pci_cfg_access_unlock(dev); + for (i = 0; i < initial; i++) { rc = pci_iov_add_virtfn(dev, i, 0); if (rc) From 3e5a7f5b95f00bf69e93a3b7019355ced807c3d0 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Fri, 17 Mar 2017 00:48:25 +0000 Subject: [PATCH 324/357] ibmveth: calculate gso_segs for large packets [ Upstream commit 94acf164dc8f1184e8d0737be7125134c2701dbe ] Include calculations to compute the number of segments that comprise an aggregated large packet. Signed-off-by: Thomas Falcon Reviewed-by: Marcelo Ricardo Leitner Reviewed-by: Jonathan Maxwell Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ibm/ibmveth.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index a36022ba4e42..03dca732e4c6 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1181,7 +1181,9 @@ map_failed: static void ibmveth_rx_mss_helper(struct sk_buff *skb, u16 mss, int lrg_pkt) { + struct tcphdr *tcph; int offset = 0; + int hdr_len; /* only TCP packets will be aggregated */ if (skb->protocol == htons(ETH_P_IP)) { @@ -1208,14 +1210,20 @@ static void ibmveth_rx_mss_helper(struct sk_buff *skb, u16 mss, int lrg_pkt) /* if mss is not set through Large Packet bit/mss in rx buffer, * expect that the mss will be written to the tcp header checksum. */ + tcph = (struct tcphdr *)(skb->data + offset); if (lrg_pkt) { skb_shinfo(skb)->gso_size = mss; } else if (offset) { - struct tcphdr *tcph = (struct tcphdr *)(skb->data + offset); - skb_shinfo(skb)->gso_size = ntohs(tcph->check); tcph->check = 0; } + + if (skb_shinfo(skb)->gso_size) { + hdr_len = offset + tcph->doff * 4; + skb_shinfo(skb)->gso_segs = + DIV_ROUND_UP(skb->len - hdr_len, + skb_shinfo(skb)->gso_size); + } } static int ibmveth_poll(struct napi_struct *napi, int budget) From 3c0cbb47a8d634978225a52764691096a7fc1468 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 17 Mar 2017 00:48:26 +0000 Subject: [PATCH 325/357] Drivers: hv: ring_buffer: count on wrap around mappings in get_next_pkt_raw() (v2) [ Upstream commit fa32ff6576623616c1751562edaed8c164ca5199 ] With wrap around mappings in place we can always provide drivers with direct links to packets on the ring buffer, even when they wrap around. Do the required updates to get_next_pkt_raw()/put_pkt_raw() The first version of this commit was reverted (65a532f3d50a) to deal with cross-tree merge issues which are (hopefully) resolved now. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Tested-by: Dexuan Cui Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 192eef2fd766..d596a076da11 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1548,31 +1548,23 @@ static inline struct vmpacket_descriptor * get_next_pkt_raw(struct vmbus_channel *channel) { struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 read_loc = ring_info->priv_read_index; + u32 priv_read_loc = ring_info->priv_read_index; void *ring_buffer = hv_get_ring_buffer(ring_info); - struct vmpacket_descriptor *cur_desc; - u32 packetlen; u32 dsize = ring_info->ring_datasize; - u32 delta = read_loc - ring_info->ring_buffer->read_index; + /* + * delta is the difference between what is available to read and + * what was already consumed in place. We commit read index after + * the whole batch is processed. + */ + u32 delta = priv_read_loc >= ring_info->ring_buffer->read_index ? + priv_read_loc - ring_info->ring_buffer->read_index : + (dsize - ring_info->ring_buffer->read_index) + priv_read_loc; u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta); if (bytes_avail_toread < sizeof(struct vmpacket_descriptor)) return NULL; - if ((read_loc + sizeof(*cur_desc)) > dsize) - return NULL; - - cur_desc = ring_buffer + read_loc; - packetlen = cur_desc->len8 << 3; - - /* - * If the packet under consideration is wrapping around, - * return failure. - */ - if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1)) - return NULL; - - return cur_desc; + return ring_buffer + priv_read_loc; } /* @@ -1584,16 +1576,14 @@ static inline void put_pkt_raw(struct vmbus_channel *channel, struct vmpacket_descriptor *desc) { struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 read_loc = ring_info->priv_read_index; u32 packetlen = desc->len8 << 3; u32 dsize = ring_info->ring_datasize; - if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize) - BUG(); /* * Include the packet trailer. */ ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER; + ring_info->priv_read_index %= dsize; } /* From 5d8b3e755974494296e4a2c5874c04aba9532566 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 17 Mar 2017 00:48:26 +0000 Subject: [PATCH 326/357] vfio/spapr: Postpone allocation of userspace version of TCE table [ Upstream commit 39701e56f5f16ea0cf8fc9e8472e645f8de91d23 ] The iommu_table struct manages a hardware TCE table and a vmalloc'd table with corresponding userspace addresses. Both are allocated when the default DMA window is created and this happens when the very first group is attached to a container. As we are going to allow the userspace to configure container in one memory context and pas container fd to another, we have to postpones such allocations till a container fd is passed to the destination user process so we would account locked memory limit against the actual container user constrainsts. This postpones the it_userspace array allocation till it is used first time for mapping. The unmapping patch already checks if the array is allocated. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Acked-by: Alex Williamson Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/vfio_iommu_spapr_tce.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 80378ddadc5c..2a6e13892397 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -509,6 +509,12 @@ static long tce_iommu_build_v2(struct tce_container *container, unsigned long hpa; enum dma_data_direction dirtmp; + if (!tbl->it_userspace) { + ret = tce_iommu_userspace_view_alloc(tbl); + if (ret) + return ret; + } + for (i = 0; i < pages; ++i) { struct mm_iommu_table_group_mem_t *mem = NULL; unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, @@ -582,15 +588,6 @@ static long tce_iommu_create_table(struct tce_container *container, WARN_ON(!ret && !(*ptbl)->it_ops->free); WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size)); - if (!ret && container->v2) { - ret = tce_iommu_userspace_view_alloc(*ptbl); - if (ret) - (*ptbl)->it_ops->free(*ptbl); - } - - if (ret) - decrement_locked_vm(table_size >> PAGE_SHIFT); - return ret; } @@ -1062,10 +1059,7 @@ static int tce_iommu_take_ownership(struct tce_container *container, if (!tbl || !tbl->it_map) continue; - rc = tce_iommu_userspace_view_alloc(tbl); - if (!rc) - rc = iommu_take_ownership(tbl); - + rc = iommu_take_ownership(tbl); if (rc) { for (j = 0; j < i; ++j) iommu_release_ownership( From 2ba7ef21820ec5cb2376078774e24d6f6a7386b2 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 17 Mar 2017 00:48:26 +0000 Subject: [PATCH 327/357] powerpc/iommu: Pass mm_struct to init/cleanup helpers [ Upstream commit 88f54a3581eb9deaa3bd1aade40aef266d782385 ] We are going to get rid of @current references in mmu_context_boos3s64.c and cache mm_struct in the VFIO container. Since mm_context_t does not have reference counting, we will be using mm_struct which does have the reference counter. This changes mm_iommu_init/mm_iommu_cleanup to receive mm_struct rather than mm_context_t (which is embedded into mm). This should not cause any behavioral change. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/mmu_context.h | 4 ++-- arch/powerpc/kernel/setup-common.c | 2 +- arch/powerpc/mm/mmu_context_book3s64.c | 4 ++-- arch/powerpc/mm/mmu_context_iommu.c | 9 +++++---- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 5c451140660a..424844bc2a57 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -23,8 +23,8 @@ extern bool mm_iommu_preregistered(void); extern long mm_iommu_get(unsigned long ua, unsigned long entries, struct mm_iommu_table_group_mem_t **pmem); extern long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem); -extern void mm_iommu_init(mm_context_t *ctx); -extern void mm_iommu_cleanup(mm_context_t *ctx); +extern void mm_iommu_init(struct mm_struct *mm); +extern void mm_iommu_cleanup(struct mm_struct *mm); extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, unsigned long size); extern struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 270ee30abdcf..f516ac508ae3 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -915,7 +915,7 @@ void __init setup_arch(char **cmdline_p) init_mm.context.pte_frag = NULL; #endif #ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_init(&init_mm.context); + mm_iommu_init(&init_mm); #endif irqstack_early_init(); exc_lvl_early_init(); diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index b114f8b93ec9..ad8273590975 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -115,7 +115,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) mm->context.pte_frag = NULL; #endif #ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_init(&mm->context); + mm_iommu_init(mm); #endif return 0; } @@ -160,7 +160,7 @@ static inline void destroy_pagetable_page(struct mm_struct *mm) void destroy_context(struct mm_struct *mm) { #ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_cleanup(&mm->context); + mm_iommu_cleanup(mm); #endif #ifdef CONFIG_PPC_ICSWX diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index e0f1c33601dd..ad2e575fd418 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -373,16 +373,17 @@ void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem) } EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec); -void mm_iommu_init(mm_context_t *ctx) +void mm_iommu_init(struct mm_struct *mm) { - INIT_LIST_HEAD_RCU(&ctx->iommu_group_mem_list); + INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list); } -void mm_iommu_cleanup(mm_context_t *ctx) +void mm_iommu_cleanup(struct mm_struct *mm) { struct mm_iommu_table_group_mem_t *mem, *tmp; - list_for_each_entry_safe(mem, tmp, &ctx->iommu_group_mem_list, next) { + list_for_each_entry_safe(mem, tmp, &mm->context.iommu_group_mem_list, + next) { list_del_rcu(&mem->next); mm_iommu_do_free(mem); } From 5b34666bd2e70046f9880db01119c7d908e0888d Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 17 Mar 2017 00:48:27 +0000 Subject: [PATCH 328/357] powerpc/iommu: Stop using @current in mm_iommu_xxx [ Upstream commit d7baee6901b34c4895eb78efdbf13a49079d7404 ] This changes mm_iommu_xxx helpers to take mm_struct as a parameter instead of getting it from @current which in some situations may not have a valid reference to mm. This changes helpers to receive @mm and moves all references to @current to the caller, including checks for !current and !current->mm; checks in mm_iommu_preregistered() are removed as there is no caller yet. This moves the mm_iommu_adjust_locked_vm() call to the caller as it receives mm_iommu_table_group_mem_t but it needs mm. This should cause no behavioral change. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Acked-by: Alex Williamson Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/mmu_context.h | 16 +++++---- arch/powerpc/mm/mmu_context_iommu.c | 46 ++++++++++---------------- drivers/vfio/vfio_iommu_spapr_tce.c | 14 +++++--- 3 files changed, 36 insertions(+), 40 deletions(-) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 424844bc2a57..b9e3f0aca261 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -19,16 +19,18 @@ extern void destroy_context(struct mm_struct *mm); struct mm_iommu_table_group_mem_t; extern int isolate_lru_page(struct page *page); /* from internal.h */ -extern bool mm_iommu_preregistered(void); -extern long mm_iommu_get(unsigned long ua, unsigned long entries, +extern bool mm_iommu_preregistered(struct mm_struct *mm); +extern long mm_iommu_get(struct mm_struct *mm, + unsigned long ua, unsigned long entries, struct mm_iommu_table_group_mem_t **pmem); -extern long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem); +extern long mm_iommu_put(struct mm_struct *mm, + struct mm_iommu_table_group_mem_t *mem); extern void mm_iommu_init(struct mm_struct *mm); extern void mm_iommu_cleanup(struct mm_struct *mm); -extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, - unsigned long size); -extern struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, - unsigned long entries); +extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, + unsigned long ua, unsigned long size); +extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, + unsigned long ua, unsigned long entries); extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, unsigned long ua, unsigned long *hpa); extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index ad2e575fd418..4c6db09e77ad 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -56,7 +56,7 @@ static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, } pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n", - current->pid, + current ? current->pid : 0, incr ? '+' : '-', npages << PAGE_SHIFT, mm->locked_vm << PAGE_SHIFT, @@ -66,12 +66,9 @@ static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, return ret; } -bool mm_iommu_preregistered(void) +bool mm_iommu_preregistered(struct mm_struct *mm) { - if (!current || !current->mm) - return false; - - return !list_empty(¤t->mm->context.iommu_group_mem_list); + return !list_empty(&mm->context.iommu_group_mem_list); } EXPORT_SYMBOL_GPL(mm_iommu_preregistered); @@ -124,19 +121,16 @@ static int mm_iommu_move_page_from_cma(struct page *page) return 0; } -long mm_iommu_get(unsigned long ua, unsigned long entries, +long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, struct mm_iommu_table_group_mem_t **pmem) { struct mm_iommu_table_group_mem_t *mem; long i, j, ret = 0, locked_entries = 0; struct page *page = NULL; - if (!current || !current->mm) - return -ESRCH; /* process exited */ - mutex_lock(&mem_list_mutex); - list_for_each_entry_rcu(mem, ¤t->mm->context.iommu_group_mem_list, + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua == ua) && (mem->entries == entries)) { ++mem->used; @@ -154,7 +148,7 @@ long mm_iommu_get(unsigned long ua, unsigned long entries, } - ret = mm_iommu_adjust_locked_vm(current->mm, entries, true); + ret = mm_iommu_adjust_locked_vm(mm, entries, true); if (ret) goto unlock_exit; @@ -215,11 +209,11 @@ populate: mem->entries = entries; *pmem = mem; - list_add_rcu(&mem->next, ¤t->mm->context.iommu_group_mem_list); + list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); unlock_exit: if (locked_entries && ret) - mm_iommu_adjust_locked_vm(current->mm, locked_entries, false); + mm_iommu_adjust_locked_vm(mm, locked_entries, false); mutex_unlock(&mem_list_mutex); @@ -264,17 +258,13 @@ static void mm_iommu_free(struct rcu_head *head) static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) { list_del_rcu(&mem->next); - mm_iommu_adjust_locked_vm(current->mm, mem->entries, false); call_rcu(&mem->rcu, mm_iommu_free); } -long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem) +long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) { long ret = 0; - if (!current || !current->mm) - return -ESRCH; /* process exited */ - mutex_lock(&mem_list_mutex); if (mem->used == 0) { @@ -297,6 +287,8 @@ long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem) /* @mapped became 0 so now mappings are disabled, release the region */ mm_iommu_release(mem); + mm_iommu_adjust_locked_vm(mm, mem->entries, false); + unlock_exit: mutex_unlock(&mem_list_mutex); @@ -304,14 +296,12 @@ unlock_exit: } EXPORT_SYMBOL_GPL(mm_iommu_put); -struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, - unsigned long size) +struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, + unsigned long ua, unsigned long size) { struct mm_iommu_table_group_mem_t *mem, *ret = NULL; - list_for_each_entry_rcu(mem, - ¤t->mm->context.iommu_group_mem_list, - next) { + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua <= ua) && (ua + size <= mem->ua + (mem->entries << PAGE_SHIFT))) { @@ -324,14 +314,12 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, } EXPORT_SYMBOL_GPL(mm_iommu_lookup); -struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, - unsigned long entries) +struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, + unsigned long ua, unsigned long entries) { struct mm_iommu_table_group_mem_t *mem, *ret = NULL; - list_for_each_entry_rcu(mem, - ¤t->mm->context.iommu_group_mem_list, - next) { + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua == ua) && (mem->entries == entries)) { ret = mem; break; diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 2a6e13892397..4efd2b20c35c 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -107,14 +107,17 @@ static long tce_iommu_unregister_pages(struct tce_container *container, { struct mm_iommu_table_group_mem_t *mem; + if (!current || !current->mm) + return -ESRCH; /* process exited */ + if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK)) return -EINVAL; - mem = mm_iommu_find(vaddr, size >> PAGE_SHIFT); + mem = mm_iommu_find(current->mm, vaddr, size >> PAGE_SHIFT); if (!mem) return -ENOENT; - return mm_iommu_put(mem); + return mm_iommu_put(current->mm, mem); } static long tce_iommu_register_pages(struct tce_container *container, @@ -124,11 +127,14 @@ static long tce_iommu_register_pages(struct tce_container *container, struct mm_iommu_table_group_mem_t *mem = NULL; unsigned long entries = size >> PAGE_SHIFT; + if (!current || !current->mm) + return -ESRCH; /* process exited */ + if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) || ((vaddr + size) < vaddr)) return -EINVAL; - ret = mm_iommu_get(vaddr, entries, &mem); + ret = mm_iommu_get(current->mm, vaddr, entries, &mem); if (ret) return ret; @@ -375,7 +381,7 @@ static int tce_iommu_prereg_ua_to_hpa(unsigned long tce, unsigned long size, long ret = 0; struct mm_iommu_table_group_mem_t *mem; - mem = mm_iommu_lookup(tce, size); + mem = mm_iommu_lookup(current->mm, tce, size); if (!mem) return -EINVAL; From 92e44bcd71ae5ae2f6d259963caf0b134243caad Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 17 Mar 2017 00:48:27 +0000 Subject: [PATCH 329/357] vfio/spapr: Reference mm in tce_container [ Upstream commit bc82d122ae4a0e9f971f13403995898fcfa0c09e ] In some situations the userspace memory context may live longer than the userspace process itself so if we need to do proper memory context cleanup, we better have tce_container take a reference to mm_struct and use it later when the process is gone (@current or @current->mm is NULL). This references mm and stores the pointer in the container; this is done in a new helper - tce_iommu_mm_set() - when one of the following happens: - a container is enabled (IOMMU v1); - a first attempt to pre-register memory is made (IOMMU v2); - a DMA window is created (IOMMU v2). The @mm stays referenced till the container is destroyed. This replaces current->mm with container->mm everywhere except debug prints. This adds a check that current->mm is the same as the one stored in the container to prevent userspace from making changes to a memory context of other processes. DMA map/unmap ioctls() do not check for @mm as they already check for @enabled which is set after tce_iommu_mm_set() is called. This does not reference a task as multiple threads within the same mm are allowed to ioctl() to vfio and supposedly they will have same limits and capabilities and if they do not, we'll just fail with no harm made. Signed-off-by: Alexey Kardashevskiy Acked-by: Alex Williamson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/vfio_iommu_spapr_tce.c | 160 +++++++++++++++++----------- 1 file changed, 100 insertions(+), 60 deletions(-) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 4efd2b20c35c..4bebe813b5fd 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -31,49 +31,49 @@ static void tce_iommu_detach_group(void *iommu_data, struct iommu_group *iommu_group); -static long try_increment_locked_vm(long npages) +static long try_increment_locked_vm(struct mm_struct *mm, long npages) { long ret = 0, locked, lock_limit; - if (!current || !current->mm) - return -ESRCH; /* process exited */ + if (WARN_ON_ONCE(!mm)) + return -EPERM; if (!npages) return 0; - down_write(¤t->mm->mmap_sem); - locked = current->mm->locked_vm + npages; + down_write(&mm->mmap_sem); + locked = mm->locked_vm + npages; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) ret = -ENOMEM; else - current->mm->locked_vm += npages; + mm->locked_vm += npages; pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid, npages << PAGE_SHIFT, - current->mm->locked_vm << PAGE_SHIFT, + mm->locked_vm << PAGE_SHIFT, rlimit(RLIMIT_MEMLOCK), ret ? " - exceeded" : ""); - up_write(¤t->mm->mmap_sem); + up_write(&mm->mmap_sem); return ret; } -static void decrement_locked_vm(long npages) +static void decrement_locked_vm(struct mm_struct *mm, long npages) { - if (!current || !current->mm || !npages) - return; /* process exited */ + if (!mm || !npages) + return; - down_write(¤t->mm->mmap_sem); - if (WARN_ON_ONCE(npages > current->mm->locked_vm)) - npages = current->mm->locked_vm; - current->mm->locked_vm -= npages; + down_write(&mm->mmap_sem); + if (WARN_ON_ONCE(npages > mm->locked_vm)) + npages = mm->locked_vm; + mm->locked_vm -= npages; pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid, npages << PAGE_SHIFT, - current->mm->locked_vm << PAGE_SHIFT, + mm->locked_vm << PAGE_SHIFT, rlimit(RLIMIT_MEMLOCK)); - up_write(¤t->mm->mmap_sem); + up_write(&mm->mmap_sem); } /* @@ -98,26 +98,38 @@ struct tce_container { bool enabled; bool v2; unsigned long locked_pages; + struct mm_struct *mm; struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; struct list_head group_list; }; +static long tce_iommu_mm_set(struct tce_container *container) +{ + if (container->mm) { + if (container->mm == current->mm) + return 0; + return -EPERM; + } + BUG_ON(!current->mm); + container->mm = current->mm; + atomic_inc(&container->mm->mm_count); + + return 0; +} + static long tce_iommu_unregister_pages(struct tce_container *container, __u64 vaddr, __u64 size) { struct mm_iommu_table_group_mem_t *mem; - if (!current || !current->mm) - return -ESRCH; /* process exited */ - if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK)) return -EINVAL; - mem = mm_iommu_find(current->mm, vaddr, size >> PAGE_SHIFT); + mem = mm_iommu_find(container->mm, vaddr, size >> PAGE_SHIFT); if (!mem) return -ENOENT; - return mm_iommu_put(current->mm, mem); + return mm_iommu_put(container->mm, mem); } static long tce_iommu_register_pages(struct tce_container *container, @@ -127,14 +139,11 @@ static long tce_iommu_register_pages(struct tce_container *container, struct mm_iommu_table_group_mem_t *mem = NULL; unsigned long entries = size >> PAGE_SHIFT; - if (!current || !current->mm) - return -ESRCH; /* process exited */ - if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) || ((vaddr + size) < vaddr)) return -EINVAL; - ret = mm_iommu_get(current->mm, vaddr, entries, &mem); + ret = mm_iommu_get(container->mm, vaddr, entries, &mem); if (ret) return ret; @@ -143,7 +152,8 @@ static long tce_iommu_register_pages(struct tce_container *container, return 0; } -static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl) +static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl, + struct mm_struct *mm) { unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) * tbl->it_size, PAGE_SIZE); @@ -152,13 +162,13 @@ static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl) BUG_ON(tbl->it_userspace); - ret = try_increment_locked_vm(cb >> PAGE_SHIFT); + ret = try_increment_locked_vm(mm, cb >> PAGE_SHIFT); if (ret) return ret; uas = vzalloc(cb); if (!uas) { - decrement_locked_vm(cb >> PAGE_SHIFT); + decrement_locked_vm(mm, cb >> PAGE_SHIFT); return -ENOMEM; } tbl->it_userspace = uas; @@ -166,7 +176,8 @@ static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl) return 0; } -static void tce_iommu_userspace_view_free(struct iommu_table *tbl) +static void tce_iommu_userspace_view_free(struct iommu_table *tbl, + struct mm_struct *mm) { unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) * tbl->it_size, PAGE_SIZE); @@ -176,7 +187,7 @@ static void tce_iommu_userspace_view_free(struct iommu_table *tbl) vfree(tbl->it_userspace); tbl->it_userspace = NULL; - decrement_locked_vm(cb >> PAGE_SHIFT); + decrement_locked_vm(mm, cb >> PAGE_SHIFT); } static bool tce_page_is_contained(struct page *page, unsigned page_shift) @@ -236,9 +247,6 @@ static int tce_iommu_enable(struct tce_container *container) struct iommu_table_group *table_group; struct tce_iommu_group *tcegrp; - if (!current->mm) - return -ESRCH; /* process exited */ - if (container->enabled) return -EBUSY; @@ -283,8 +291,12 @@ static int tce_iommu_enable(struct tce_container *container) if (!table_group->tce32_size) return -EPERM; + ret = tce_iommu_mm_set(container); + if (ret) + return ret; + locked = table_group->tce32_size >> PAGE_SHIFT; - ret = try_increment_locked_vm(locked); + ret = try_increment_locked_vm(container->mm, locked); if (ret) return ret; @@ -302,10 +314,8 @@ static void tce_iommu_disable(struct tce_container *container) container->enabled = false; - if (!current->mm) - return; - - decrement_locked_vm(container->locked_pages); + BUG_ON(!container->mm); + decrement_locked_vm(container->mm, container->locked_pages); } static void *tce_iommu_open(unsigned long arg) @@ -332,7 +342,8 @@ static void *tce_iommu_open(unsigned long arg) static int tce_iommu_clear(struct tce_container *container, struct iommu_table *tbl, unsigned long entry, unsigned long pages); -static void tce_iommu_free_table(struct iommu_table *tbl); +static void tce_iommu_free_table(struct tce_container *container, + struct iommu_table *tbl); static void tce_iommu_release(void *iommu_data) { @@ -357,10 +368,12 @@ static void tce_iommu_release(void *iommu_data) continue; tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); - tce_iommu_free_table(tbl); + tce_iommu_free_table(container, tbl); } tce_iommu_disable(container); + if (container->mm) + mmdrop(container->mm); mutex_destroy(&container->lock); kfree(container); @@ -375,13 +388,14 @@ static void tce_iommu_unuse_page(struct tce_container *container, put_page(page); } -static int tce_iommu_prereg_ua_to_hpa(unsigned long tce, unsigned long size, +static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container, + unsigned long tce, unsigned long size, unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem) { long ret = 0; struct mm_iommu_table_group_mem_t *mem; - mem = mm_iommu_lookup(current->mm, tce, size); + mem = mm_iommu_lookup(container->mm, tce, size); if (!mem) return -EINVAL; @@ -394,18 +408,18 @@ static int tce_iommu_prereg_ua_to_hpa(unsigned long tce, unsigned long size, return 0; } -static void tce_iommu_unuse_page_v2(struct iommu_table *tbl, - unsigned long entry) +static void tce_iommu_unuse_page_v2(struct tce_container *container, + struct iommu_table *tbl, unsigned long entry) { struct mm_iommu_table_group_mem_t *mem = NULL; int ret; unsigned long hpa = 0; unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); - if (!pua || !current || !current->mm) + if (!pua) return; - ret = tce_iommu_prereg_ua_to_hpa(*pua, IOMMU_PAGE_SIZE(tbl), + ret = tce_iommu_prereg_ua_to_hpa(container, *pua, IOMMU_PAGE_SIZE(tbl), &hpa, &mem); if (ret) pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n", @@ -435,7 +449,7 @@ static int tce_iommu_clear(struct tce_container *container, continue; if (container->v2) { - tce_iommu_unuse_page_v2(tbl, entry); + tce_iommu_unuse_page_v2(container, tbl, entry); continue; } @@ -516,7 +530,7 @@ static long tce_iommu_build_v2(struct tce_container *container, enum dma_data_direction dirtmp; if (!tbl->it_userspace) { - ret = tce_iommu_userspace_view_alloc(tbl); + ret = tce_iommu_userspace_view_alloc(tbl, container->mm); if (ret) return ret; } @@ -526,8 +540,8 @@ static long tce_iommu_build_v2(struct tce_container *container, unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i); - ret = tce_iommu_prereg_ua_to_hpa(tce, IOMMU_PAGE_SIZE(tbl), - &hpa, &mem); + ret = tce_iommu_prereg_ua_to_hpa(container, + tce, IOMMU_PAGE_SIZE(tbl), &hpa, &mem); if (ret) break; @@ -548,7 +562,7 @@ static long tce_iommu_build_v2(struct tce_container *container, ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp); if (ret) { /* dirtmp cannot be DMA_NONE here */ - tce_iommu_unuse_page_v2(tbl, entry + i); + tce_iommu_unuse_page_v2(container, tbl, entry + i); pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n", __func__, entry << tbl->it_page_shift, tce, ret); @@ -556,7 +570,7 @@ static long tce_iommu_build_v2(struct tce_container *container, } if (dirtmp != DMA_NONE) - tce_iommu_unuse_page_v2(tbl, entry + i); + tce_iommu_unuse_page_v2(container, tbl, entry + i); *pua = tce; @@ -584,7 +598,7 @@ static long tce_iommu_create_table(struct tce_container *container, if (!table_size) return -EINVAL; - ret = try_increment_locked_vm(table_size >> PAGE_SHIFT); + ret = try_increment_locked_vm(container->mm, table_size >> PAGE_SHIFT); if (ret) return ret; @@ -597,13 +611,14 @@ static long tce_iommu_create_table(struct tce_container *container, return ret; } -static void tce_iommu_free_table(struct iommu_table *tbl) +static void tce_iommu_free_table(struct tce_container *container, + struct iommu_table *tbl) { unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; - tce_iommu_userspace_view_free(tbl); + tce_iommu_userspace_view_free(tbl, container->mm); tbl->it_ops->free(tbl); - decrement_locked_vm(pages); + decrement_locked_vm(container->mm, pages); } static long tce_iommu_create_window(struct tce_container *container, @@ -666,7 +681,7 @@ unset_exit: table_group = iommu_group_get_iommudata(tcegrp->grp); table_group->ops->unset_window(table_group, num); } - tce_iommu_free_table(tbl); + tce_iommu_free_table(container, tbl); return ret; } @@ -704,7 +719,7 @@ static long tce_iommu_remove_window(struct tce_container *container, /* Free table */ tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); - tce_iommu_free_table(tbl); + tce_iommu_free_table(container, tbl); container->tables[num] = NULL; return 0; @@ -730,7 +745,17 @@ static long tce_iommu_ioctl(void *iommu_data, } return (ret < 0) ? 0 : ret; + } + /* + * Sanity check to prevent one userspace from manipulating + * another userspace mm. + */ + BUG_ON(!container); + if (container->mm && container->mm != current->mm) + return -EPERM; + + switch (cmd) { case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { struct vfio_iommu_spapr_tce_info info; struct tce_iommu_group *tcegrp; @@ -891,6 +916,10 @@ static long tce_iommu_ioctl(void *iommu_data, minsz = offsetofend(struct vfio_iommu_spapr_register_memory, size); + ret = tce_iommu_mm_set(container); + if (ret) + return ret; + if (copy_from_user(¶m, (void __user *)arg, minsz)) return -EFAULT; @@ -914,6 +943,9 @@ static long tce_iommu_ioctl(void *iommu_data, if (!container->v2) break; + if (!container->mm) + return -EPERM; + minsz = offsetofend(struct vfio_iommu_spapr_register_memory, size); @@ -972,6 +1004,10 @@ static long tce_iommu_ioctl(void *iommu_data, if (!container->v2) break; + ret = tce_iommu_mm_set(container); + if (ret) + return ret; + if (!tce_groups_attached(container)) return -ENXIO; @@ -1006,6 +1042,10 @@ static long tce_iommu_ioctl(void *iommu_data, if (!container->v2) break; + ret = tce_iommu_mm_set(container); + if (ret) + return ret; + if (!tce_groups_attached(container)) return -ENXIO; @@ -1046,7 +1086,7 @@ static void tce_iommu_release_ownership(struct tce_container *container, continue; tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); - tce_iommu_userspace_view_free(tbl); + tce_iommu_userspace_view_free(tbl, container->mm); if (tbl->it_map) iommu_release_ownership(tbl); From 080eb13542a853b0ee6061f17bf124079d08e64e Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 17 Mar 2017 00:48:27 +0000 Subject: [PATCH 330/357] powerpc/mm/iommu, vfio/spapr: Put pages on VFIO container shutdown [ Upstream commit 4b6fad7097f883335b6d9627c883cb7f276d94c9 ] At the moment the userspace tool is expected to request pinning of the entire guest RAM when VFIO IOMMU SPAPR v2 driver is present. When the userspace process finishes, all the pinned pages need to be put; this is done as a part of the userspace memory context (MM) destruction which happens on the very last mmdrop(). This approach has a problem that a MM of the userspace process may live longer than the userspace process itself as kernel threads use userspace process MMs which was runnning on a CPU where the kernel thread was scheduled to. If this happened, the MM remains referenced until this exact kernel thread wakes up again and releases the very last reference to the MM, on an idle system this can take even hours. This moves preregistered regions tracking from MM to VFIO; insteads of using mm_iommu_table_group_mem_t::used, tce_container::prereg_list is added so each container releases regions which it has pre-registered. This changes the userspace interface to return EBUSY if a memory region is already registered in a container. However it should not have any practical effect as the only userspace tool available now does register memory region once per container anyway. As tce_iommu_register_pages/tce_iommu_unregister_pages are called under container->lock, this does not need additional locking. Signed-off-by: Alexey Kardashevskiy Reviewed-by: Nicholas Piggin Acked-by: Alex Williamson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/mmu_context_book3s64.c | 4 +- arch/powerpc/mm/mmu_context_iommu.c | 11 ----- drivers/vfio/vfio_iommu_spapr_tce.c | 61 +++++++++++++++++++++++++- 3 files changed, 61 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index ad8273590975..73bf6e14c3aa 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -156,13 +156,11 @@ static inline void destroy_pagetable_page(struct mm_struct *mm) } #endif - void destroy_context(struct mm_struct *mm) { #ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_cleanup(mm); + WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list)); #endif - #ifdef CONFIG_PPC_ICSWX drop_cop(mm->context.acop, mm); kfree(mm->context.cop_lockp); diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index 4c6db09e77ad..104bad029ce9 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -365,14 +365,3 @@ void mm_iommu_init(struct mm_struct *mm) { INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list); } - -void mm_iommu_cleanup(struct mm_struct *mm) -{ - struct mm_iommu_table_group_mem_t *mem, *tmp; - - list_for_each_entry_safe(mem, tmp, &mm->context.iommu_group_mem_list, - next) { - list_del_rcu(&mem->next); - mm_iommu_do_free(mem); - } -} diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 4bebe813b5fd..33e59953c8a4 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -88,6 +88,15 @@ struct tce_iommu_group { struct iommu_group *grp; }; +/* + * A container needs to remember which preregistered region it has + * referenced to do proper cleanup at the userspace process exit. + */ +struct tce_iommu_prereg { + struct list_head next; + struct mm_iommu_table_group_mem_t *mem; +}; + /* * The container descriptor supports only a single group per container. * Required by the API as the container is not supplied with the IOMMU group @@ -101,6 +110,7 @@ struct tce_container { struct mm_struct *mm; struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; struct list_head group_list; + struct list_head prereg_list; }; static long tce_iommu_mm_set(struct tce_container *container) @@ -117,10 +127,27 @@ static long tce_iommu_mm_set(struct tce_container *container) return 0; } +static long tce_iommu_prereg_free(struct tce_container *container, + struct tce_iommu_prereg *tcemem) +{ + long ret; + + ret = mm_iommu_put(container->mm, tcemem->mem); + if (ret) + return ret; + + list_del(&tcemem->next); + kfree(tcemem); + + return 0; +} + static long tce_iommu_unregister_pages(struct tce_container *container, __u64 vaddr, __u64 size) { struct mm_iommu_table_group_mem_t *mem; + struct tce_iommu_prereg *tcemem; + bool found = false; if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK)) return -EINVAL; @@ -129,7 +156,17 @@ static long tce_iommu_unregister_pages(struct tce_container *container, if (!mem) return -ENOENT; - return mm_iommu_put(container->mm, mem); + list_for_each_entry(tcemem, &container->prereg_list, next) { + if (tcemem->mem == mem) { + found = true; + break; + } + } + + if (!found) + return -ENOENT; + + return tce_iommu_prereg_free(container, tcemem); } static long tce_iommu_register_pages(struct tce_container *container, @@ -137,16 +174,29 @@ static long tce_iommu_register_pages(struct tce_container *container, { long ret = 0; struct mm_iommu_table_group_mem_t *mem = NULL; + struct tce_iommu_prereg *tcemem; unsigned long entries = size >> PAGE_SHIFT; if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) || ((vaddr + size) < vaddr)) return -EINVAL; + mem = mm_iommu_find(container->mm, vaddr, entries); + if (mem) { + list_for_each_entry(tcemem, &container->prereg_list, next) { + if (tcemem->mem == mem) + return -EBUSY; + } + } + ret = mm_iommu_get(container->mm, vaddr, entries, &mem); if (ret) return ret; + tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL); + tcemem->mem = mem; + list_add(&tcemem->next, &container->prereg_list); + container->enabled = true; return 0; @@ -333,6 +383,7 @@ static void *tce_iommu_open(unsigned long arg) mutex_init(&container->lock); INIT_LIST_HEAD_RCU(&container->group_list); + INIT_LIST_HEAD_RCU(&container->prereg_list); container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU; @@ -371,6 +422,14 @@ static void tce_iommu_release(void *iommu_data) tce_iommu_free_table(container, tbl); } + while (!list_empty(&container->prereg_list)) { + struct tce_iommu_prereg *tcemem; + + tcemem = list_first_entry(&container->prereg_list, + struct tce_iommu_prereg, next); + WARN_ON_ONCE(tce_iommu_prereg_free(container, tcemem)); + } + tce_iommu_disable(container); if (container->mm) mmdrop(container->mm); From 2e60baca235b05d4026207e31b6fc385c0c1c122 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 17 Mar 2017 00:48:28 +0000 Subject: [PATCH 331/357] vfio/spapr: Add a helper to create default DMA window [ Upstream commit 6f01cc692a16405235d5c34056455b182682123c ] There is already a helper to create a DMA window which does allocate a table and programs it to the IOMMU group. However tce_iommu_take_ownership_ddw() did not use it and did these 2 calls itself to simplify error path. Since we are going to delay the default window creation till the default window is accessed/removed or new window is added, we need a helper to create a default window from all these cases. This adds tce_iommu_create_default_window(). Since it relies on a VFIO container to have at least one IOMMU group (for future use), this changes tce_iommu_attach_group() to add a group to the container first and then call the new helper. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Acked-by: Alex Williamson Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/vfio_iommu_spapr_tce.c | 87 ++++++++++++++--------------- 1 file changed, 42 insertions(+), 45 deletions(-) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 33e59953c8a4..e95c3ebcec59 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -784,6 +784,29 @@ static long tce_iommu_remove_window(struct tce_container *container, return 0; } +static long tce_iommu_create_default_window(struct tce_container *container) +{ + long ret; + __u64 start_addr = 0; + struct tce_iommu_group *tcegrp; + struct iommu_table_group *table_group; + + if (!tce_groups_attached(container)) + return -ENODEV; + + tcegrp = list_first_entry(&container->group_list, + struct tce_iommu_group, next); + table_group = iommu_group_get_iommudata(tcegrp->grp); + if (!table_group) + return -ENODEV; + + ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K, + table_group->tce32_size, 1, &start_addr); + WARN_ON_ONCE(!ret && start_addr); + + return ret; +} + static long tce_iommu_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -1199,9 +1222,6 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container, static long tce_iommu_take_ownership_ddw(struct tce_container *container, struct iommu_table_group *table_group) { - long i, ret = 0; - struct iommu_table *tbl = NULL; - if (!table_group->ops->create_table || !table_group->ops->set_window || !table_group->ops->release_ownership) { WARN_ON_ONCE(1); @@ -1210,47 +1230,7 @@ static long tce_iommu_take_ownership_ddw(struct tce_container *container, table_group->ops->take_ownership(table_group); - /* - * If it the first group attached, check if there is - * a default DMA window and create one if none as - * the userspace expects it to exist. - */ - if (!tce_groups_attached(container) && !container->tables[0]) { - ret = tce_iommu_create_table(container, - table_group, - 0, /* window number */ - IOMMU_PAGE_SHIFT_4K, - table_group->tce32_size, - 1, /* default levels */ - &tbl); - if (ret) - goto release_exit; - else - container->tables[0] = tbl; - } - - /* Set all windows to the new group */ - for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { - tbl = container->tables[i]; - - if (!tbl) - continue; - - /* Set the default window to a new group */ - ret = table_group->ops->set_window(table_group, i, tbl); - if (ret) - goto release_exit; - } - return 0; - -release_exit: - for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) - table_group->ops->unset_window(table_group, i); - - table_group->ops->release_ownership(table_group); - - return ret; } static int tce_iommu_attach_group(void *iommu_data, @@ -1260,6 +1240,7 @@ static int tce_iommu_attach_group(void *iommu_data, struct tce_container *container = iommu_data; struct iommu_table_group *table_group; struct tce_iommu_group *tcegrp = NULL; + bool create_default_window = false; mutex_lock(&container->lock); @@ -1302,14 +1283,30 @@ static int tce_iommu_attach_group(void *iommu_data, } if (!table_group->ops || !table_group->ops->take_ownership || - !table_group->ops->release_ownership) + !table_group->ops->release_ownership) { ret = tce_iommu_take_ownership(container, table_group); - else + } else { ret = tce_iommu_take_ownership_ddw(container, table_group); + if (!tce_groups_attached(container) && !container->tables[0]) + create_default_window = true; + } if (!ret) { tcegrp->grp = iommu_group; list_add(&tcegrp->next, &container->group_list); + /* + * If it the first group attached, check if there is + * a default DMA window and create one if none as + * the userspace expects it to exist. + */ + if (create_default_window) { + ret = tce_iommu_create_default_window(container); + if (ret) { + list_del(&tcegrp->next); + tce_iommu_release_ownership_ddw(container, + table_group); + } + } } unlock_exit: From 53e18968a9c0753e542cc4f6b0cc2da5db64ffd9 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 17 Mar 2017 00:48:29 +0000 Subject: [PATCH 332/357] vfio/spapr: Postpone default window creation [ Upstream commit d9c728949ddc9de5734bf3b12ea906ca8a77f2a0 ] We are going to allow the userspace to configure container in one memory context and pass container fd to another so we are postponing memory allocations accounted against the locked memory limit. One of previous patches took care of it_userspace. At the moment we create the default DMA window when the first group is attached to a container; this is done for the userspace which is not DDW-aware but familiar with the SPAPR TCE IOMMU v2 in the part of memory pre-registration - such client expects the default DMA window to exist. This postpones the default DMA window allocation till one of the folliwing happens: 1. first map/unmap request arrives; 2. new window is requested; This adds noop for the case when the userspace requested removal of the default window which has not been created yet. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Acked-by: Alex Williamson Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/vfio_iommu_spapr_tce.c | 40 ++++++++++++++++++----------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index e95c3ebcec59..c8823578a1b2 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -106,6 +106,7 @@ struct tce_container { struct mutex lock; bool enabled; bool v2; + bool def_window_pending; unsigned long locked_pages; struct mm_struct *mm; struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; @@ -791,6 +792,9 @@ static long tce_iommu_create_default_window(struct tce_container *container) struct tce_iommu_group *tcegrp; struct iommu_table_group *table_group; + if (!container->def_window_pending) + return 0; + if (!tce_groups_attached(container)) return -ENODEV; @@ -804,6 +808,9 @@ static long tce_iommu_create_default_window(struct tce_container *container) table_group->tce32_size, 1, &start_addr); WARN_ON_ONCE(!ret && start_addr); + if (!ret) + container->def_window_pending = false; + return ret; } @@ -907,6 +914,10 @@ static long tce_iommu_ioctl(void *iommu_data, VFIO_DMA_MAP_FLAG_WRITE)) return -EINVAL; + ret = tce_iommu_create_default_window(container); + if (ret) + return ret; + num = tce_iommu_find_table(container, param.iova, &tbl); if (num < 0) return -ENXIO; @@ -970,6 +981,10 @@ static long tce_iommu_ioctl(void *iommu_data, if (param.flags) return -EINVAL; + ret = tce_iommu_create_default_window(container); + if (ret) + return ret; + num = tce_iommu_find_table(container, param.iova, &tbl); if (num < 0) return -ENXIO; @@ -1107,6 +1122,10 @@ static long tce_iommu_ioctl(void *iommu_data, mutex_lock(&container->lock); + ret = tce_iommu_create_default_window(container); + if (ret) + return ret; + ret = tce_iommu_create_window(container, create.page_shift, create.window_size, create.levels, &create.start_addr); @@ -1143,6 +1162,11 @@ static long tce_iommu_ioctl(void *iommu_data, if (remove.flags) return -EINVAL; + if (container->def_window_pending && !remove.start_addr) { + container->def_window_pending = false; + return 0; + } + mutex_lock(&container->lock); ret = tce_iommu_remove_window(container, remove.start_addr); @@ -1240,7 +1264,6 @@ static int tce_iommu_attach_group(void *iommu_data, struct tce_container *container = iommu_data; struct iommu_table_group *table_group; struct tce_iommu_group *tcegrp = NULL; - bool create_default_window = false; mutex_lock(&container->lock); @@ -1288,25 +1311,12 @@ static int tce_iommu_attach_group(void *iommu_data, } else { ret = tce_iommu_take_ownership_ddw(container, table_group); if (!tce_groups_attached(container) && !container->tables[0]) - create_default_window = true; + container->def_window_pending = true; } if (!ret) { tcegrp->grp = iommu_group; list_add(&tcegrp->next, &container->group_list); - /* - * If it the first group attached, check if there is - * a default DMA window and create one if none as - * the userspace expects it to exist. - */ - if (create_default_window) { - ret = tce_iommu_create_default_window(container); - if (ret) { - list_del(&tcegrp->next); - tce_iommu_release_ownership_ddw(container, - table_group); - } - } } unlock_exit: From ddc23b5212b9cfa2e4f802bd95e365e63a6b2cc7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 17 Mar 2017 00:48:29 +0000 Subject: [PATCH 333/357] drm/nouveau/disp/gp102: fix cursor/overlay immediate channel indices [ Upstream commit e50fcff15fe120ef2103a9e18af6644235c2b14d ] Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/nouveau/nvkm/engine/disp/Kbuild | 2 + .../drm/nouveau/nvkm/engine/disp/channv50.h | 2 + .../drm/nouveau/nvkm/engine/disp/cursgp102.c | 37 +++++++++++++++++++ .../drm/nouveau/nvkm/engine/disp/oimmgp102.c | 37 +++++++++++++++++++ .../drm/nouveau/nvkm/engine/disp/rootgp104.c | 4 +- 5 files changed, 80 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgp102.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgp102.c diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild index 77a52b54a31e..70f0344c508c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild @@ -95,9 +95,11 @@ nvkm-y += nvkm/engine/disp/cursg84.o nvkm-y += nvkm/engine/disp/cursgt215.o nvkm-y += nvkm/engine/disp/cursgf119.o nvkm-y += nvkm/engine/disp/cursgk104.o +nvkm-y += nvkm/engine/disp/cursgp102.o nvkm-y += nvkm/engine/disp/oimmnv50.o nvkm-y += nvkm/engine/disp/oimmg84.o nvkm-y += nvkm/engine/disp/oimmgt215.o nvkm-y += nvkm/engine/disp/oimmgf119.o nvkm-y += nvkm/engine/disp/oimmgk104.o +nvkm-y += nvkm/engine/disp/oimmgp102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h index f5f683d9fd20..04a7e582a8bc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h @@ -114,6 +114,8 @@ extern const struct nv50_disp_pioc_oclass gf119_disp_curs_oclass; extern const struct nv50_disp_pioc_oclass gk104_disp_oimm_oclass; extern const struct nv50_disp_pioc_oclass gk104_disp_curs_oclass; +extern const struct nv50_disp_pioc_oclass gp102_disp_oimm_oclass; +extern const struct nv50_disp_pioc_oclass gp102_disp_curs_oclass; int nv50_disp_curs_new(const struct nv50_disp_chan_func *, const struct nv50_disp_chan_mthd *, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgp102.c new file mode 100644 index 000000000000..e958210d8105 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgp102.c @@ -0,0 +1,37 @@ +/* + * Copyright 2016 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + */ +#include "channv50.h" +#include "rootnv50.h" + +#include + +const struct nv50_disp_pioc_oclass +gp102_disp_curs_oclass = { + .base.oclass = GK104_DISP_CURSOR, + .base.minver = 0, + .base.maxver = 0, + .ctor = nv50_disp_curs_new, + .func = &gf119_disp_pioc_func, + .chid = { 13, 17 }, +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgp102.c new file mode 100644 index 000000000000..abf82365c671 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgp102.c @@ -0,0 +1,37 @@ +/* + * Copyright 2016 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + */ +#include "channv50.h" +#include "rootnv50.h" + +#include + +const struct nv50_disp_pioc_oclass +gp102_disp_oimm_oclass = { + .base.oclass = GK104_DISP_OVERLAY, + .base.minver = 0, + .base.maxver = 0, + .ctor = nv50_disp_oimm_new, + .func = &gf119_disp_pioc_func, + .chid = { 9, 13 }, +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp104.c index 8443e04dc626..b053b291cd94 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp104.c @@ -36,8 +36,8 @@ gp104_disp_root = { &gp104_disp_ovly_oclass, }, .pioc = { - &gk104_disp_oimm_oclass, - &gk104_disp_curs_oclass, + &gp102_disp_oimm_oclass, + &gp102_disp_curs_oclass, }, }; From 5001756c1c70ab555d4d3ca6965547ac02410d4b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 17 Mar 2017 00:48:29 +0000 Subject: [PATCH 334/357] drm/nouveau/disp/nv50-: split chid into chid.ctrl and chid.user [ Upstream commit 4391d7f5c79a9fe6fa11cf6c160ca7f7bdb49d2a ] GP102/GP104 make life difficult by redefining the channel indices for some registers, but not others. Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../drm/nouveau/nvkm/engine/disp/channv50.c | 23 +++++----- .../drm/nouveau/nvkm/engine/disp/channv50.h | 6 ++- .../drm/nouveau/nvkm/engine/disp/dmacgf119.c | 44 ++++++++++--------- .../drm/nouveau/nvkm/engine/disp/dmacgp104.c | 23 +++++----- .../drm/nouveau/nvkm/engine/disp/dmacnv50.c | 44 ++++++++++--------- .../drm/nouveau/nvkm/engine/disp/piocgf119.c | 28 ++++++------ .../drm/nouveau/nvkm/engine/disp/piocnv50.c | 30 +++++++------ 7 files changed, 106 insertions(+), 92 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c index dd2953bc9264..376f2c6132a5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c @@ -82,7 +82,7 @@ nv50_disp_chan_mthd(struct nv50_disp_chan *chan, int debug) if (mthd->addr) { snprintf(cname_, sizeof(cname_), "%s %d", - mthd->name, chan->chid); + mthd->name, chan->chid.user); cname = cname_; } @@ -139,7 +139,7 @@ nv50_disp_chan_uevent_ctor(struct nvkm_object *object, void *data, u32 size, if (!(ret = nvif_unvers(ret, &data, &size, args->none))) { notify->size = sizeof(struct nvif_notify_uevent_rep); notify->types = 1; - notify->index = chan->chid; + notify->index = chan->chid.user; return 0; } @@ -159,7 +159,7 @@ nv50_disp_chan_rd32(struct nvkm_object *object, u64 addr, u32 *data) struct nv50_disp_chan *chan = nv50_disp_chan(object); struct nv50_disp *disp = chan->root->disp; struct nvkm_device *device = disp->base.engine.subdev.device; - *data = nvkm_rd32(device, 0x640000 + (chan->chid * 0x1000) + addr); + *data = nvkm_rd32(device, 0x640000 + (chan->chid.user * 0x1000) + addr); return 0; } @@ -169,7 +169,7 @@ nv50_disp_chan_wr32(struct nvkm_object *object, u64 addr, u32 data) struct nv50_disp_chan *chan = nv50_disp_chan(object); struct nv50_disp *disp = chan->root->disp; struct nvkm_device *device = disp->base.engine.subdev.device; - nvkm_wr32(device, 0x640000 + (chan->chid * 0x1000) + addr, data); + nvkm_wr32(device, 0x640000 + (chan->chid.user * 0x1000) + addr, data); return 0; } @@ -196,7 +196,7 @@ nv50_disp_chan_map(struct nvkm_object *object, u64 *addr, u32 *size) struct nv50_disp *disp = chan->root->disp; struct nvkm_device *device = disp->base.engine.subdev.device; *addr = device->func->resource_addr(device, 0) + - 0x640000 + (chan->chid * 0x1000); + 0x640000 + (chan->chid.user * 0x1000); *size = 0x001000; return 0; } @@ -243,8 +243,8 @@ nv50_disp_chan_dtor(struct nvkm_object *object) { struct nv50_disp_chan *chan = nv50_disp_chan(object); struct nv50_disp *disp = chan->root->disp; - if (chan->chid >= 0) - disp->chan[chan->chid] = NULL; + if (chan->chid.user >= 0) + disp->chan[chan->chid.user] = NULL; return chan->func->dtor ? chan->func->dtor(chan) : chan; } @@ -273,14 +273,15 @@ nv50_disp_chan_ctor(const struct nv50_disp_chan_func *func, chan->func = func; chan->mthd = mthd; chan->root = root; - chan->chid = chid; + chan->chid.ctrl = chid; + chan->chid.user = chid; chan->head = head; - if (disp->chan[chan->chid]) { - chan->chid = -1; + if (disp->chan[chan->chid.user]) { + chan->chid.user = -1; return -EBUSY; } - disp->chan[chan->chid] = chan; + disp->chan[chan->chid.user] = chan; return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h index 04a7e582a8bc..7b5a2eaff174 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h @@ -7,7 +7,11 @@ struct nv50_disp_chan { const struct nv50_disp_chan_func *func; const struct nv50_disp_chan_mthd *mthd; struct nv50_disp_root *root; - int chid; + + struct { + int ctrl; + int user; + } chid; int head; struct nvkm_object object; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c index a57f7cef307a..ce7cd74fbd5d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c @@ -32,8 +32,8 @@ gf119_disp_dmac_bind(struct nv50_disp_dmac *chan, struct nvkm_object *object, u32 handle) { return nvkm_ramht_insert(chan->base.root->ramht, object, - chan->base.chid, -9, handle, - chan->base.chid << 27 | 0x00000001); + chan->base.chid.user, -9, handle, + chan->base.chid.user << 27 | 0x00000001); } void @@ -42,22 +42,23 @@ gf119_disp_dmac_fini(struct nv50_disp_dmac *chan) struct nv50_disp *disp = chan->base.root->disp; struct nvkm_subdev *subdev = &disp->base.engine.subdev; struct nvkm_device *device = subdev->device; - int chid = chan->base.chid; + int ctrl = chan->base.chid.ctrl; + int user = chan->base.chid.user; /* deactivate channel */ - nvkm_mask(device, 0x610490 + (chid * 0x0010), 0x00001010, 0x00001000); - nvkm_mask(device, 0x610490 + (chid * 0x0010), 0x00000003, 0x00000000); + nvkm_mask(device, 0x610490 + (ctrl * 0x0010), 0x00001010, 0x00001000); + nvkm_mask(device, 0x610490 + (ctrl * 0x0010), 0x00000003, 0x00000000); if (nvkm_msec(device, 2000, - if (!(nvkm_rd32(device, 0x610490 + (chid * 0x10)) & 0x001e0000)) + if (!(nvkm_rd32(device, 0x610490 + (ctrl * 0x10)) & 0x001e0000)) break; ) < 0) { - nvkm_error(subdev, "ch %d fini: %08x\n", chid, - nvkm_rd32(device, 0x610490 + (chid * 0x10))); + nvkm_error(subdev, "ch %d fini: %08x\n", user, + nvkm_rd32(device, 0x610490 + (ctrl * 0x10))); } /* disable error reporting and completion notification */ - nvkm_mask(device, 0x610090, 0x00000001 << chid, 0x00000000); - nvkm_mask(device, 0x6100a0, 0x00000001 << chid, 0x00000000); + nvkm_mask(device, 0x610090, 0x00000001 << user, 0x00000000); + nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000000); } static int @@ -66,26 +67,27 @@ gf119_disp_dmac_init(struct nv50_disp_dmac *chan) struct nv50_disp *disp = chan->base.root->disp; struct nvkm_subdev *subdev = &disp->base.engine.subdev; struct nvkm_device *device = subdev->device; - int chid = chan->base.chid; + int ctrl = chan->base.chid.ctrl; + int user = chan->base.chid.user; /* enable error reporting */ - nvkm_mask(device, 0x6100a0, 0x00000001 << chid, 0x00000001 << chid); + nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000001 << user); /* initialise channel for dma command submission */ - nvkm_wr32(device, 0x610494 + (chid * 0x0010), chan->push); - nvkm_wr32(device, 0x610498 + (chid * 0x0010), 0x00010000); - nvkm_wr32(device, 0x61049c + (chid * 0x0010), 0x00000001); - nvkm_mask(device, 0x610490 + (chid * 0x0010), 0x00000010, 0x00000010); - nvkm_wr32(device, 0x640000 + (chid * 0x1000), 0x00000000); - nvkm_wr32(device, 0x610490 + (chid * 0x0010), 0x00000013); + nvkm_wr32(device, 0x610494 + (ctrl * 0x0010), chan->push); + nvkm_wr32(device, 0x610498 + (ctrl * 0x0010), 0x00010000); + nvkm_wr32(device, 0x61049c + (ctrl * 0x0010), 0x00000001); + nvkm_mask(device, 0x610490 + (ctrl * 0x0010), 0x00000010, 0x00000010); + nvkm_wr32(device, 0x640000 + (ctrl * 0x1000), 0x00000000); + nvkm_wr32(device, 0x610490 + (ctrl * 0x0010), 0x00000013); /* wait for it to go inactive */ if (nvkm_msec(device, 2000, - if (!(nvkm_rd32(device, 0x610490 + (chid * 0x10)) & 0x80000000)) + if (!(nvkm_rd32(device, 0x610490 + (ctrl * 0x10)) & 0x80000000)) break; ) < 0) { - nvkm_error(subdev, "ch %d init: %08x\n", chid, - nvkm_rd32(device, 0x610490 + (chid * 0x10))); + nvkm_error(subdev, "ch %d init: %08x\n", user, + nvkm_rd32(device, 0x610490 + (ctrl * 0x10))); return -EBUSY; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp104.c index ad24c2c57696..d26d3b4c41a4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp104.c @@ -32,26 +32,27 @@ gp104_disp_dmac_init(struct nv50_disp_dmac *chan) struct nv50_disp *disp = chan->base.root->disp; struct nvkm_subdev *subdev = &disp->base.engine.subdev; struct nvkm_device *device = subdev->device; - int chid = chan->base.chid; + int ctrl = chan->base.chid.ctrl; + int user = chan->base.chid.user; /* enable error reporting */ - nvkm_mask(device, 0x6100a0, 0x00000001 << chid, 0x00000001 << chid); + nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000001 << user); /* initialise channel for dma command submission */ - nvkm_wr32(device, 0x611494 + (chid * 0x0010), chan->push); - nvkm_wr32(device, 0x611498 + (chid * 0x0010), 0x00010000); - nvkm_wr32(device, 0x61149c + (chid * 0x0010), 0x00000001); - nvkm_mask(device, 0x610490 + (chid * 0x0010), 0x00000010, 0x00000010); - nvkm_wr32(device, 0x640000 + (chid * 0x1000), 0x00000000); - nvkm_wr32(device, 0x610490 + (chid * 0x0010), 0x00000013); + nvkm_wr32(device, 0x611494 + (ctrl * 0x0010), chan->push); + nvkm_wr32(device, 0x611498 + (ctrl * 0x0010), 0x00010000); + nvkm_wr32(device, 0x61149c + (ctrl * 0x0010), 0x00000001); + nvkm_mask(device, 0x610490 + (ctrl * 0x0010), 0x00000010, 0x00000010); + nvkm_wr32(device, 0x640000 + (ctrl * 0x1000), 0x00000000); + nvkm_wr32(device, 0x610490 + (ctrl * 0x0010), 0x00000013); /* wait for it to go inactive */ if (nvkm_msec(device, 2000, - if (!(nvkm_rd32(device, 0x610490 + (chid * 0x10)) & 0x80000000)) + if (!(nvkm_rd32(device, 0x610490 + (ctrl * 0x10)) & 0x80000000)) break; ) < 0) { - nvkm_error(subdev, "ch %d init: %08x\n", chid, - nvkm_rd32(device, 0x610490 + (chid * 0x10))); + nvkm_error(subdev, "ch %d init: %08x\n", user, + nvkm_rd32(device, 0x610490 + (ctrl * 0x10))); return -EBUSY; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c index 9c6645a357b9..cfba994bef4d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c @@ -179,9 +179,9 @@ nv50_disp_dmac_bind(struct nv50_disp_dmac *chan, struct nvkm_object *object, u32 handle) { return nvkm_ramht_insert(chan->base.root->ramht, object, - chan->base.chid, -10, handle, - chan->base.chid << 28 | - chan->base.chid); + chan->base.chid.user, -10, handle, + chan->base.chid.user << 28 | + chan->base.chid.user); } static void @@ -190,21 +190,22 @@ nv50_disp_dmac_fini(struct nv50_disp_dmac *chan) struct nv50_disp *disp = chan->base.root->disp; struct nvkm_subdev *subdev = &disp->base.engine.subdev; struct nvkm_device *device = subdev->device; - int chid = chan->base.chid; + int ctrl = chan->base.chid.ctrl; + int user = chan->base.chid.user; /* deactivate channel */ - nvkm_mask(device, 0x610200 + (chid * 0x0010), 0x00001010, 0x00001000); - nvkm_mask(device, 0x610200 + (chid * 0x0010), 0x00000003, 0x00000000); + nvkm_mask(device, 0x610200 + (ctrl * 0x0010), 0x00001010, 0x00001000); + nvkm_mask(device, 0x610200 + (ctrl * 0x0010), 0x00000003, 0x00000000); if (nvkm_msec(device, 2000, - if (!(nvkm_rd32(device, 0x610200 + (chid * 0x10)) & 0x001e0000)) + if (!(nvkm_rd32(device, 0x610200 + (ctrl * 0x10)) & 0x001e0000)) break; ) < 0) { - nvkm_error(subdev, "ch %d fini timeout, %08x\n", chid, - nvkm_rd32(device, 0x610200 + (chid * 0x10))); + nvkm_error(subdev, "ch %d fini timeout, %08x\n", user, + nvkm_rd32(device, 0x610200 + (ctrl * 0x10))); } /* disable error reporting and completion notifications */ - nvkm_mask(device, 0x610028, 0x00010001 << chid, 0x00000000 << chid); + nvkm_mask(device, 0x610028, 0x00010001 << user, 0x00000000 << user); } static int @@ -213,26 +214,27 @@ nv50_disp_dmac_init(struct nv50_disp_dmac *chan) struct nv50_disp *disp = chan->base.root->disp; struct nvkm_subdev *subdev = &disp->base.engine.subdev; struct nvkm_device *device = subdev->device; - int chid = chan->base.chid; + int ctrl = chan->base.chid.ctrl; + int user = chan->base.chid.user; /* enable error reporting */ - nvkm_mask(device, 0x610028, 0x00010000 << chid, 0x00010000 << chid); + nvkm_mask(device, 0x610028, 0x00010000 << user, 0x00010000 << user); /* initialise channel for dma command submission */ - nvkm_wr32(device, 0x610204 + (chid * 0x0010), chan->push); - nvkm_wr32(device, 0x610208 + (chid * 0x0010), 0x00010000); - nvkm_wr32(device, 0x61020c + (chid * 0x0010), chid); - nvkm_mask(device, 0x610200 + (chid * 0x0010), 0x00000010, 0x00000010); - nvkm_wr32(device, 0x640000 + (chid * 0x1000), 0x00000000); - nvkm_wr32(device, 0x610200 + (chid * 0x0010), 0x00000013); + nvkm_wr32(device, 0x610204 + (ctrl * 0x0010), chan->push); + nvkm_wr32(device, 0x610208 + (ctrl * 0x0010), 0x00010000); + nvkm_wr32(device, 0x61020c + (ctrl * 0x0010), ctrl); + nvkm_mask(device, 0x610200 + (ctrl * 0x0010), 0x00000010, 0x00000010); + nvkm_wr32(device, 0x640000 + (ctrl * 0x1000), 0x00000000); + nvkm_wr32(device, 0x610200 + (ctrl * 0x0010), 0x00000013); /* wait for it to go inactive */ if (nvkm_msec(device, 2000, - if (!(nvkm_rd32(device, 0x610200 + (chid * 0x10)) & 0x80000000)) + if (!(nvkm_rd32(device, 0x610200 + (ctrl * 0x10)) & 0x80000000)) break; ) < 0) { - nvkm_error(subdev, "ch %d init timeout, %08x\n", chid, - nvkm_rd32(device, 0x610200 + (chid * 0x10))); + nvkm_error(subdev, "ch %d init timeout, %08x\n", user, + nvkm_rd32(device, 0x610200 + (ctrl * 0x10))); return -EBUSY; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocgf119.c index a625a9876e34..0abaa6431943 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocgf119.c @@ -32,20 +32,21 @@ gf119_disp_pioc_fini(struct nv50_disp_chan *chan) struct nv50_disp *disp = chan->root->disp; struct nvkm_subdev *subdev = &disp->base.engine.subdev; struct nvkm_device *device = subdev->device; - int chid = chan->chid; + int ctrl = chan->chid.ctrl; + int user = chan->chid.user; - nvkm_mask(device, 0x610490 + (chid * 0x10), 0x00000001, 0x00000000); + nvkm_mask(device, 0x610490 + (ctrl * 0x10), 0x00000001, 0x00000000); if (nvkm_msec(device, 2000, - if (!(nvkm_rd32(device, 0x610490 + (chid * 0x10)) & 0x00030000)) + if (!(nvkm_rd32(device, 0x610490 + (ctrl * 0x10)) & 0x00030000)) break; ) < 0) { - nvkm_error(subdev, "ch %d fini: %08x\n", chid, - nvkm_rd32(device, 0x610490 + (chid * 0x10))); + nvkm_error(subdev, "ch %d fini: %08x\n", user, + nvkm_rd32(device, 0x610490 + (ctrl * 0x10))); } /* disable error reporting and completion notification */ - nvkm_mask(device, 0x610090, 0x00000001 << chid, 0x00000000); - nvkm_mask(device, 0x6100a0, 0x00000001 << chid, 0x00000000); + nvkm_mask(device, 0x610090, 0x00000001 << user, 0x00000000); + nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000000); } static int @@ -54,20 +55,21 @@ gf119_disp_pioc_init(struct nv50_disp_chan *chan) struct nv50_disp *disp = chan->root->disp; struct nvkm_subdev *subdev = &disp->base.engine.subdev; struct nvkm_device *device = subdev->device; - int chid = chan->chid; + int ctrl = chan->chid.ctrl; + int user = chan->chid.user; /* enable error reporting */ - nvkm_mask(device, 0x6100a0, 0x00000001 << chid, 0x00000001 << chid); + nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000001 << user); /* activate channel */ - nvkm_wr32(device, 0x610490 + (chid * 0x10), 0x00000001); + nvkm_wr32(device, 0x610490 + (ctrl * 0x10), 0x00000001); if (nvkm_msec(device, 2000, - u32 tmp = nvkm_rd32(device, 0x610490 + (chid * 0x10)); + u32 tmp = nvkm_rd32(device, 0x610490 + (ctrl * 0x10)); if ((tmp & 0x00030000) == 0x00010000) break; ) < 0) { - nvkm_error(subdev, "ch %d init: %08x\n", chid, - nvkm_rd32(device, 0x610490 + (chid * 0x10))); + nvkm_error(subdev, "ch %d init: %08x\n", user, + nvkm_rd32(device, 0x610490 + (ctrl * 0x10))); return -EBUSY; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocnv50.c index 9d2618dacf20..0211e0e8a35f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocnv50.c @@ -32,15 +32,16 @@ nv50_disp_pioc_fini(struct nv50_disp_chan *chan) struct nv50_disp *disp = chan->root->disp; struct nvkm_subdev *subdev = &disp->base.engine.subdev; struct nvkm_device *device = subdev->device; - int chid = chan->chid; + int ctrl = chan->chid.ctrl; + int user = chan->chid.user; - nvkm_mask(device, 0x610200 + (chid * 0x10), 0x00000001, 0x00000000); + nvkm_mask(device, 0x610200 + (ctrl * 0x10), 0x00000001, 0x00000000); if (nvkm_msec(device, 2000, - if (!(nvkm_rd32(device, 0x610200 + (chid * 0x10)) & 0x00030000)) + if (!(nvkm_rd32(device, 0x610200 + (ctrl * 0x10)) & 0x00030000)) break; ) < 0) { - nvkm_error(subdev, "ch %d timeout: %08x\n", chid, - nvkm_rd32(device, 0x610200 + (chid * 0x10))); + nvkm_error(subdev, "ch %d timeout: %08x\n", user, + nvkm_rd32(device, 0x610200 + (ctrl * 0x10))); } } @@ -50,26 +51,27 @@ nv50_disp_pioc_init(struct nv50_disp_chan *chan) struct nv50_disp *disp = chan->root->disp; struct nvkm_subdev *subdev = &disp->base.engine.subdev; struct nvkm_device *device = subdev->device; - int chid = chan->chid; + int ctrl = chan->chid.ctrl; + int user = chan->chid.user; - nvkm_wr32(device, 0x610200 + (chid * 0x10), 0x00002000); + nvkm_wr32(device, 0x610200 + (ctrl * 0x10), 0x00002000); if (nvkm_msec(device, 2000, - if (!(nvkm_rd32(device, 0x610200 + (chid * 0x10)) & 0x00030000)) + if (!(nvkm_rd32(device, 0x610200 + (ctrl * 0x10)) & 0x00030000)) break; ) < 0) { - nvkm_error(subdev, "ch %d timeout0: %08x\n", chid, - nvkm_rd32(device, 0x610200 + (chid * 0x10))); + nvkm_error(subdev, "ch %d timeout0: %08x\n", user, + nvkm_rd32(device, 0x610200 + (ctrl * 0x10))); return -EBUSY; } - nvkm_wr32(device, 0x610200 + (chid * 0x10), 0x00000001); + nvkm_wr32(device, 0x610200 + (ctrl * 0x10), 0x00000001); if (nvkm_msec(device, 2000, - u32 tmp = nvkm_rd32(device, 0x610200 + (chid * 0x10)); + u32 tmp = nvkm_rd32(device, 0x610200 + (ctrl * 0x10)); if ((tmp & 0x00030000) == 0x00010000) break; ) < 0) { - nvkm_error(subdev, "ch %d timeout1: %08x\n", chid, - nvkm_rd32(device, 0x610200 + (chid * 0x10))); + nvkm_error(subdev, "ch %d timeout1: %08x\n", user, + nvkm_rd32(device, 0x610200 + (ctrl * 0x10))); return -EBUSY; } From 0042afe117c69e41511e22c15994e853a856e10f Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 17 Mar 2017 00:48:30 +0000 Subject: [PATCH 335/357] drm/nouveau/disp/nv50-: specify ctrl/user separately when constructing classes [ Upstream commit 2a32b9b1866a2ee9f01fbf2a48d99012f0120739 ] Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/nouveau/nvkm/engine/disp/channv50.c | 11 ++++++----- .../gpu/drm/nouveau/nvkm/engine/disp/channv50.h | 15 +++++++++------ .../gpu/drm/nouveau/nvkm/engine/disp/cursg84.c | 2 +- .../gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c | 2 +- .../gpu/drm/nouveau/nvkm/engine/disp/cursgk104.c | 2 +- .../gpu/drm/nouveau/nvkm/engine/disp/cursgt215.c | 2 +- .../gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c | 6 +++--- .../gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c | 2 +- .../gpu/drm/nouveau/nvkm/engine/disp/oimmg84.c | 2 +- .../gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c | 2 +- .../gpu/drm/nouveau/nvkm/engine/disp/oimmgk104.c | 2 +- .../gpu/drm/nouveau/nvkm/engine/disp/oimmgt215.c | 2 +- .../gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c | 6 +++--- .../gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c | 4 ++-- 14 files changed, 32 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c index 376f2c6132a5..9d90d8b4b7e6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c @@ -263,7 +263,7 @@ nv50_disp_chan = { int nv50_disp_chan_ctor(const struct nv50_disp_chan_func *func, const struct nv50_disp_chan_mthd *mthd, - struct nv50_disp_root *root, int chid, int head, + struct nv50_disp_root *root, int ctrl, int user, int head, const struct nvkm_oclass *oclass, struct nv50_disp_chan *chan) { @@ -273,8 +273,8 @@ nv50_disp_chan_ctor(const struct nv50_disp_chan_func *func, chan->func = func; chan->mthd = mthd; chan->root = root; - chan->chid.ctrl = chid; - chan->chid.user = chid; + chan->chid.ctrl = ctrl; + chan->chid.user = user; chan->head = head; if (disp->chan[chan->chid.user]) { @@ -288,7 +288,7 @@ nv50_disp_chan_ctor(const struct nv50_disp_chan_func *func, int nv50_disp_chan_new_(const struct nv50_disp_chan_func *func, const struct nv50_disp_chan_mthd *mthd, - struct nv50_disp_root *root, int chid, int head, + struct nv50_disp_root *root, int ctrl, int user, int head, const struct nvkm_oclass *oclass, struct nvkm_object **pobject) { @@ -298,5 +298,6 @@ nv50_disp_chan_new_(const struct nv50_disp_chan_func *func, return -ENOMEM; *pobject = &chan->object; - return nv50_disp_chan_ctor(func, mthd, root, chid, head, oclass, chan); + return nv50_disp_chan_ctor(func, mthd, root, ctrl, user, + head, oclass, chan); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h index 7b5a2eaff174..737b38f6fbd2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h @@ -29,11 +29,11 @@ struct nv50_disp_chan_func { int nv50_disp_chan_ctor(const struct nv50_disp_chan_func *, const struct nv50_disp_chan_mthd *, - struct nv50_disp_root *, int chid, int head, + struct nv50_disp_root *, int ctrl, int user, int head, const struct nvkm_oclass *, struct nv50_disp_chan *); int nv50_disp_chan_new_(const struct nv50_disp_chan_func *, const struct nv50_disp_chan_mthd *, - struct nv50_disp_root *, int chid, int head, + struct nv50_disp_root *, int ctrl, int user, int head, const struct nvkm_oclass *, struct nvkm_object **); extern const struct nv50_disp_chan_func nv50_disp_pioc_func; @@ -94,13 +94,16 @@ extern const struct nv50_disp_chan_mthd gk104_disp_ovly_chan_mthd; struct nv50_disp_pioc_oclass { int (*ctor)(const struct nv50_disp_chan_func *, const struct nv50_disp_chan_mthd *, - struct nv50_disp_root *, int chid, + struct nv50_disp_root *, int ctrl, int user, const struct nvkm_oclass *, void *data, u32 size, struct nvkm_object **); struct nvkm_sclass base; const struct nv50_disp_chan_func *func; const struct nv50_disp_chan_mthd *mthd; - int chid; + struct { + int ctrl; + int user; + } chid; }; extern const struct nv50_disp_pioc_oclass nv50_disp_oimm_oclass; @@ -123,12 +126,12 @@ extern const struct nv50_disp_pioc_oclass gp102_disp_curs_oclass; int nv50_disp_curs_new(const struct nv50_disp_chan_func *, const struct nv50_disp_chan_mthd *, - struct nv50_disp_root *, int chid, + struct nv50_disp_root *, int ctrl, int user, const struct nvkm_oclass *, void *data, u32 size, struct nvkm_object **); int nv50_disp_oimm_new(const struct nv50_disp_chan_func *, const struct nv50_disp_chan_mthd *, - struct nv50_disp_root *, int chid, + struct nv50_disp_root *, int ctrl, int user, const struct nvkm_oclass *, void *data, u32 size, struct nvkm_object **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursg84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursg84.c index dd99fc7060b1..fa781b5a7e07 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursg84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursg84.c @@ -33,5 +33,5 @@ g84_disp_curs_oclass = { .base.maxver = 0, .ctor = nv50_disp_curs_new, .func = &nv50_disp_pioc_func, - .chid = 7, + .chid = { 7, 7 }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c index 2a1574e06ad6..2be6fb052c65 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c @@ -33,5 +33,5 @@ gf119_disp_curs_oclass = { .base.maxver = 0, .ctor = nv50_disp_curs_new, .func = &gf119_disp_pioc_func, - .chid = 13, + .chid = { 13, 13 }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgk104.c index 28e8f06c9472..2a99db4bf8f8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgk104.c @@ -33,5 +33,5 @@ gk104_disp_curs_oclass = { .base.maxver = 0, .ctor = nv50_disp_curs_new, .func = &gf119_disp_pioc_func, - .chid = 13, + .chid = { 13, 13 }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgt215.c index d8a4b9ca139c..00a7f3564450 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgt215.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgt215.c @@ -33,5 +33,5 @@ gt215_disp_curs_oclass = { .base.maxver = 0, .ctor = nv50_disp_curs_new, .func = &nv50_disp_pioc_func, - .chid = 7, + .chid = { 7, 7 }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c index 8b1320499a0f..82ff82d8c1ab 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c @@ -33,7 +33,7 @@ int nv50_disp_curs_new(const struct nv50_disp_chan_func *func, const struct nv50_disp_chan_mthd *mthd, - struct nv50_disp_root *root, int chid, + struct nv50_disp_root *root, int ctrl, int user, const struct nvkm_oclass *oclass, void *data, u32 size, struct nvkm_object **pobject) { @@ -54,7 +54,7 @@ nv50_disp_curs_new(const struct nv50_disp_chan_func *func, } else return ret; - return nv50_disp_chan_new_(func, mthd, root, chid + head, + return nv50_disp_chan_new_(func, mthd, root, ctrl + head, user + head, head, oclass, pobject); } @@ -65,5 +65,5 @@ nv50_disp_curs_oclass = { .base.maxver = 0, .ctor = nv50_disp_curs_new, .func = &nv50_disp_pioc_func, - .chid = 7, + .chid = { 7, 7 }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c index cfba994bef4d..0a1381a84552 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c @@ -149,7 +149,7 @@ nv50_disp_dmac_new_(const struct nv50_disp_dmac_func *func, chan->func = func; ret = nv50_disp_chan_ctor(&nv50_disp_dmac_func_, mthd, root, - chid, head, oclass, &chan->base); + chid, chid, head, oclass, &chan->base); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmg84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmg84.c index 54a4ae8d66c6..5ad5d0f5db05 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmg84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmg84.c @@ -33,5 +33,5 @@ g84_disp_oimm_oclass = { .base.maxver = 0, .ctor = nv50_disp_oimm_new, .func = &nv50_disp_pioc_func, - .chid = 5, + .chid = { 5, 5 }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c index c658db54afc5..1f9fd3403f07 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c @@ -33,5 +33,5 @@ gf119_disp_oimm_oclass = { .base.maxver = 0, .ctor = nv50_disp_oimm_new, .func = &gf119_disp_pioc_func, - .chid = 9, + .chid = { 9, 9 }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgk104.c index b1fde8c125d6..0c09fe85e952 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgk104.c @@ -33,5 +33,5 @@ gk104_disp_oimm_oclass = { .base.maxver = 0, .ctor = nv50_disp_oimm_new, .func = &gf119_disp_pioc_func, - .chid = 9, + .chid = { 9, 9 }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgt215.c index f4e7eb3d1177..1281db28aebd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgt215.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgt215.c @@ -33,5 +33,5 @@ gt215_disp_oimm_oclass = { .base.maxver = 0, .ctor = nv50_disp_oimm_new, .func = &nv50_disp_pioc_func, - .chid = 5, + .chid = { 5, 5 }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c index 3940b9c966ec..07540f3d32dc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c @@ -33,7 +33,7 @@ int nv50_disp_oimm_new(const struct nv50_disp_chan_func *func, const struct nv50_disp_chan_mthd *mthd, - struct nv50_disp_root *root, int chid, + struct nv50_disp_root *root, int ctrl, int user, const struct nvkm_oclass *oclass, void *data, u32 size, struct nvkm_object **pobject) { @@ -54,7 +54,7 @@ nv50_disp_oimm_new(const struct nv50_disp_chan_func *func, } else return ret; - return nv50_disp_chan_new_(func, mthd, root, chid + head, + return nv50_disp_chan_new_(func, mthd, root, ctrl + head, user + head, head, oclass, pobject); } @@ -65,5 +65,5 @@ nv50_disp_oimm_oclass = { .base.maxver = 0, .ctor = nv50_disp_oimm_new, .func = &nv50_disp_pioc_func, - .chid = 5, + .chid = { 5, 5 }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c index 2f9cecd81d04..05c829a603d1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c @@ -207,8 +207,8 @@ nv50_disp_root_pioc_new_(const struct nvkm_oclass *oclass, { const struct nv50_disp_pioc_oclass *sclass = oclass->priv; struct nv50_disp_root *root = nv50_disp_root(oclass->parent); - return sclass->ctor(sclass->func, sclass->mthd, root, sclass->chid, - oclass, data, size, pobject); + return sclass->ctor(sclass->func, sclass->mthd, root, sclass->chid.ctrl, + sclass->chid.user, oclass, data, size, pobject); } static int From 61a153d06ef4ce17bf2a200dc6e7247dc3b56a57 Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Fri, 17 Mar 2017 00:48:30 +0000 Subject: [PATCH 336/357] block: allow WRITE_SAME commands with the SG_IO ioctl [ Upstream commit 25cdb64510644f3e854d502d69c73f21c6df88a9 ] The WRITE_SAME commands are not present in the blk_default_cmd_filter write_ok list, and thus are failed with -EPERM when the SG_IO ioctl() is executed without CAP_SYS_RAWIO capability (e.g., unprivileged users). [ sg_io() -> blk_fill_sghdr_rq() > blk_verify_command() -> -EPERM ] The problem can be reproduced with the sg_write_same command # sg_write_same --num 1 --xferlen 512 /dev/sda # # capsh --drop=cap_sys_rawio -- -c \ 'sg_write_same --num 1 --xferlen 512 /dev/sda' Write same: pass through os error: Operation not permitted # For comparison, the WRITE_VERIFY command does not observe this problem, since it is in that list: # capsh --drop=cap_sys_rawio -- -c \ 'sg_write_verify --num 1 --ilen 512 --lba 0 /dev/sda' # So, this patch adds the WRITE_SAME commands to the list, in order for the SG_IO ioctl to finish successfully: # capsh --drop=cap_sys_rawio -- -c \ 'sg_write_same --num 1 --xferlen 512 /dev/sda' # That case happens to be exercised by QEMU KVM guests with 'scsi-block' devices (qemu "-device scsi-block" [1], libvirt "" [2]), which employs the SG_IO ioctl() and runs as an unprivileged user (libvirt-qemu). In that scenario, when a filesystem (e.g., ext4) performs its zero-out calls, which are translated to write-same calls in the guest kernel, and then into SG_IO ioctls to the host kernel, SCSI I/O errors may be observed in the guest: [...] sd 0:0:0:0: [sda] tag#0 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE [...] sd 0:0:0:0: [sda] tag#0 Sense Key : Aborted Command [current] [...] sd 0:0:0:0: [sda] tag#0 Add. Sense: I/O process terminated [...] sd 0:0:0:0: [sda] tag#0 CDB: Write Same(10) 41 00 01 04 e0 78 00 00 08 00 [...] blk_update_request: I/O error, dev sda, sector 17096824 Links: [1] http://git.qemu.org/?p=qemu.git;a=commit;h=336a6915bc7089fb20fea4ba99972ad9a97c5f52 [2] https://libvirt.org/formatdomain.html#elementsDisks (see 'disk' -> 'device') Signed-off-by: Mauricio Faria de Oliveira Signed-off-by: Brahadambal Srinivasan Reported-by: Manjunatha H R Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- block/scsi_ioctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 0774799942e0..c6fee7437be4 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -182,6 +182,9 @@ static void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter) __set_bit(WRITE_16, filter->write_ok); __set_bit(WRITE_LONG, filter->write_ok); __set_bit(WRITE_LONG_2, filter->write_ok); + __set_bit(WRITE_SAME, filter->write_ok); + __set_bit(WRITE_SAME_16, filter->write_ok); + __set_bit(WRITE_SAME_32, filter->write_ok); __set_bit(ERASE, filter->write_ok); __set_bit(GPCMD_MODE_SELECT_10, filter->write_ok); __set_bit(MODE_SELECT, filter->write_ok); From e627116c0c35d3d42fb031683606a49ce7d63cc5 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Fri, 17 Mar 2017 00:48:31 +0000 Subject: [PATCH 337/357] s390/zcrypt: Introduce CEX6 toleration [ Upstream commit b3e8652bcbfa04807e44708d4d0c8cdad39c9215 ] Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/crypto/ap_bus.c | 3 +++ drivers/s390/crypto/ap_bus.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index ed92fb09fc8e..76b802cf2f0b 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1712,6 +1712,9 @@ static void ap_scan_bus(struct work_struct *unused) ap_dev->queue_depth = queue_depth; ap_dev->raw_hwtype = device_type; ap_dev->device_type = device_type; + /* CEX6 toleration: map to CEX5 */ + if (device_type == AP_DEVICE_TYPE_CEX6) + ap_dev->device_type = AP_DEVICE_TYPE_CEX5; ap_dev->functions = device_functions; spin_lock_init(&ap_dev->lock); INIT_LIST_HEAD(&ap_dev->pendingq); diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index d7fdf5c024d7..fd66d2c450d5 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -105,6 +105,7 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) #define AP_DEVICE_TYPE_CEX3C 9 #define AP_DEVICE_TYPE_CEX4 10 #define AP_DEVICE_TYPE_CEX5 11 +#define AP_DEVICE_TYPE_CEX6 12 /* * Known function facilities From bd2de45031b9b05738c91b87c1c360471c075bbd Mon Sep 17 00:00:00 2001 From: Henrik Ingo Date: Fri, 17 Mar 2017 00:48:31 +0000 Subject: [PATCH 338/357] uvcvideo: uvc_scan_fallback() for webcams with broken chain [ Upstream commit e950267ab802c8558f1100eafd4087fd039ad634 ] Some devices have invalid baSourceID references, causing uvc_scan_chain() to fail, but if we just take the entities we can find and put them together in the most sensible chain we can think of, turns out they do work anyway. Note: This heuristic assumes there is a single chain. At the time of writing, devices known to have such a broken chain are - Acer Integrated Camera (5986:055a) - Realtek rtl157a7 (0bda:57a7) Signed-off-by: Henrik Ingo Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/uvc/uvc_driver.c | 118 +++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 6 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 302e284a95eb..cde43b63c3da 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -1595,6 +1595,114 @@ static const char *uvc_print_chain(struct uvc_video_chain *chain) return buffer; } +static struct uvc_video_chain *uvc_alloc_chain(struct uvc_device *dev) +{ + struct uvc_video_chain *chain; + + chain = kzalloc(sizeof(*chain), GFP_KERNEL); + if (chain == NULL) + return NULL; + + INIT_LIST_HEAD(&chain->entities); + mutex_init(&chain->ctrl_mutex); + chain->dev = dev; + v4l2_prio_init(&chain->prio); + + return chain; +} + +/* + * Fallback heuristic for devices that don't connect units and terminals in a + * valid chain. + * + * Some devices have invalid baSourceID references, causing uvc_scan_chain() + * to fail, but if we just take the entities we can find and put them together + * in the most sensible chain we can think of, turns out they do work anyway. + * Note: This heuristic assumes there is a single chain. + * + * At the time of writing, devices known to have such a broken chain are + * - Acer Integrated Camera (5986:055a) + * - Realtek rtl157a7 (0bda:57a7) + */ +static int uvc_scan_fallback(struct uvc_device *dev) +{ + struct uvc_video_chain *chain; + struct uvc_entity *iterm = NULL; + struct uvc_entity *oterm = NULL; + struct uvc_entity *entity; + struct uvc_entity *prev; + + /* + * Start by locating the input and output terminals. We only support + * devices with exactly one of each for now. + */ + list_for_each_entry(entity, &dev->entities, list) { + if (UVC_ENTITY_IS_ITERM(entity)) { + if (iterm) + return -EINVAL; + iterm = entity; + } + + if (UVC_ENTITY_IS_OTERM(entity)) { + if (oterm) + return -EINVAL; + oterm = entity; + } + } + + if (iterm == NULL || oterm == NULL) + return -EINVAL; + + /* Allocate the chain and fill it. */ + chain = uvc_alloc_chain(dev); + if (chain == NULL) + return -ENOMEM; + + if (uvc_scan_chain_entity(chain, oterm) < 0) + goto error; + + prev = oterm; + + /* + * Add all Processing and Extension Units with two pads. The order + * doesn't matter much, use reverse list traversal to connect units in + * UVC descriptor order as we build the chain from output to input. This + * leads to units appearing in the order meant by the manufacturer for + * the cameras known to require this heuristic. + */ + list_for_each_entry_reverse(entity, &dev->entities, list) { + if (entity->type != UVC_VC_PROCESSING_UNIT && + entity->type != UVC_VC_EXTENSION_UNIT) + continue; + + if (entity->num_pads != 2) + continue; + + if (uvc_scan_chain_entity(chain, entity) < 0) + goto error; + + prev->baSourceID[0] = entity->id; + prev = entity; + } + + if (uvc_scan_chain_entity(chain, iterm) < 0) + goto error; + + prev->baSourceID[0] = iterm->id; + + list_add_tail(&chain->list, &dev->chains); + + uvc_trace(UVC_TRACE_PROBE, + "Found a video chain by fallback heuristic (%s).\n", + uvc_print_chain(chain)); + + return 0; + +error: + kfree(chain); + return -EINVAL; +} + /* * Scan the device for video chains and register video devices. * @@ -1617,15 +1725,10 @@ static int uvc_scan_device(struct uvc_device *dev) if (term->chain.next || term->chain.prev) continue; - chain = kzalloc(sizeof(*chain), GFP_KERNEL); + chain = uvc_alloc_chain(dev); if (chain == NULL) return -ENOMEM; - INIT_LIST_HEAD(&chain->entities); - mutex_init(&chain->ctrl_mutex); - chain->dev = dev; - v4l2_prio_init(&chain->prio); - term->flags |= UVC_ENTITY_FLAG_DEFAULT; if (uvc_scan_chain(chain, term) < 0) { @@ -1639,6 +1742,9 @@ static int uvc_scan_device(struct uvc_device *dev) list_add_tail(&chain->list, &dev->chains); } + if (list_empty(&dev->chains)) + uvc_scan_fallback(dev); + if (list_empty(&dev->chains)) { uvc_printk(KERN_INFO, "No valid video chain found.\n"); return -1; From bc01eb939899762eede303ffbbbfcda197316234 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Fri, 17 Mar 2017 00:48:31 +0000 Subject: [PATCH 339/357] slub: move synchronize_sched out of slab_mutex on shrink [ Upstream commit 89e364db71fb5e7fc8d93228152abfa67daf35fa ] synchronize_sched() is a heavy operation and calling it per each cache owned by a memory cgroup being destroyed may take quite some time. What is worse, it's currently called under the slab_mutex, stalling all works doing cache creation/destruction. Actually, there isn't much point in calling synchronize_sched() for each cache - it's enough to call it just once - after setting cpu_partial for all caches and before shrinking them. This way, we can also move it out of the slab_mutex, which we have to hold for iterating over the slab cache list. Link: https://bugzilla.kernel.org/show_bug.cgi?id=172991 Link: http://lkml.kernel.org/r/0a10d71ecae3db00fb4421bcd3f82bcc911f4be4.1475329751.git.vdavydov.dev@gmail.com Signed-off-by: Vladimir Davydov Reported-by: Doug Smythies Acked-by: Joonsoo Kim Cc: Christoph Lameter Cc: David Rientjes Cc: Johannes Weiner Cc: Michal Hocko Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/slab.c | 4 ++-- mm/slab.h | 2 +- mm/slab_common.c | 27 +++++++++++++++++++++++++-- mm/slob.c | 2 +- mm/slub.c | 19 ++----------------- 5 files changed, 31 insertions(+), 23 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index bd878f051a3b..1f82d16a0518 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2332,7 +2332,7 @@ out: return nr_freed; } -int __kmem_cache_shrink(struct kmem_cache *cachep, bool deactivate) +int __kmem_cache_shrink(struct kmem_cache *cachep) { int ret = 0; int node; @@ -2352,7 +2352,7 @@ int __kmem_cache_shrink(struct kmem_cache *cachep, bool deactivate) int __kmem_cache_shutdown(struct kmem_cache *cachep) { - return __kmem_cache_shrink(cachep, false); + return __kmem_cache_shrink(cachep); } void __kmem_cache_release(struct kmem_cache *cachep) diff --git a/mm/slab.h b/mm/slab.h index bc05fdc3edce..ceb7d70cdb76 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -146,7 +146,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size, int __kmem_cache_shutdown(struct kmem_cache *); void __kmem_cache_release(struct kmem_cache *); -int __kmem_cache_shrink(struct kmem_cache *, bool); +int __kmem_cache_shrink(struct kmem_cache *); void slab_kmem_cache_release(struct kmem_cache *); struct seq_file; diff --git a/mm/slab_common.c b/mm/slab_common.c index 329b03843863..5d2f24fbafc5 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -573,6 +573,29 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) get_online_cpus(); get_online_mems(); +#ifdef CONFIG_SLUB + /* + * In case of SLUB, we need to disable empty slab caching to + * avoid pinning the offline memory cgroup by freeable kmem + * pages charged to it. SLAB doesn't need this, as it + * periodically purges unused slabs. + */ + mutex_lock(&slab_mutex); + list_for_each_entry(s, &slab_caches, list) { + c = is_root_cache(s) ? cache_from_memcg_idx(s, idx) : NULL; + if (c) { + c->cpu_partial = 0; + c->min_partial = 0; + } + } + mutex_unlock(&slab_mutex); + /* + * kmem_cache->cpu_partial is checked locklessly (see + * put_cpu_partial()). Make sure the change is visible. + */ + synchronize_sched(); +#endif + mutex_lock(&slab_mutex); list_for_each_entry(s, &slab_caches, list) { if (!is_root_cache(s)) @@ -584,7 +607,7 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) if (!c) continue; - __kmem_cache_shrink(c, true); + __kmem_cache_shrink(c); arr->entries[idx] = NULL; } mutex_unlock(&slab_mutex); @@ -755,7 +778,7 @@ int kmem_cache_shrink(struct kmem_cache *cachep) get_online_cpus(); get_online_mems(); kasan_cache_shrink(cachep); - ret = __kmem_cache_shrink(cachep, false); + ret = __kmem_cache_shrink(cachep); put_online_mems(); put_online_cpus(); return ret; diff --git a/mm/slob.c b/mm/slob.c index 5ec158054ffe..eac04d4357ec 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -634,7 +634,7 @@ void __kmem_cache_release(struct kmem_cache *c) { } -int __kmem_cache_shrink(struct kmem_cache *d, bool deactivate) +int __kmem_cache_shrink(struct kmem_cache *d) { return 0; } diff --git a/mm/slub.c b/mm/slub.c index 7aa0e97af928..58c7526f8de2 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3887,7 +3887,7 @@ EXPORT_SYMBOL(kfree); * being allocated from last increasing the chance that the last objects * are freed in them. */ -int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) +int __kmem_cache_shrink(struct kmem_cache *s) { int node; int i; @@ -3899,21 +3899,6 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) unsigned long flags; int ret = 0; - if (deactivate) { - /* - * Disable empty slabs caching. Used to avoid pinning offline - * memory cgroups by kmem pages that can be freed. - */ - s->cpu_partial = 0; - s->min_partial = 0; - - /* - * s->cpu_partial is checked locklessly (see put_cpu_partial), - * so we have to make sure the change is visible. - */ - synchronize_sched(); - } - flush_all(s); for_each_kmem_cache_node(s, node, n) { INIT_LIST_HEAD(&discard); @@ -3970,7 +3955,7 @@ static int slab_mem_going_offline_callback(void *arg) mutex_lock(&slab_mutex); list_for_each_entry(s, &slab_caches, list) - __kmem_cache_shrink(s, false); + __kmem_cache_shrink(s); mutex_unlock(&slab_mutex); return 0; From 5c19e9070df8165db7ba26c13f39848d94e2ce8a Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 17 Mar 2017 00:48:32 +0000 Subject: [PATCH 340/357] ACPI / blacklist: add _REV quirks for Dell Precision 5520 and 3520 [ Upstream commit 9523b9bf6dceef6b0215e90b2348cd646597f796 ] Precision 5520 and 3520 either hang at login and during suspend or reboot. It turns out that that adding them to acpi_rev_dmi_table[] helps to work around those issues. Signed-off-by: Alex Hung [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/blacklist.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c index bdc67bad61a7..4f87fd748303 100644 --- a/drivers/acpi/blacklist.c +++ b/drivers/acpi/blacklist.c @@ -160,6 +160,22 @@ static struct dmi_system_id acpi_rev_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9343"), }, }, + { + .callback = dmi_enable_rev_override, + .ident = "DELL Precision 5520", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 5520"), + }, + }, + { + .callback = dmi_enable_rev_override, + .ident = "DELL Precision 3520", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3520"), + }, + }, #endif {} }; From 56c28e7983a8c877e99de0152ce3f7f5eea98963 Mon Sep 17 00:00:00 2001 From: Michael Pobega Date: Fri, 17 Mar 2017 00:48:32 +0000 Subject: [PATCH 341/357] ACPI / blacklist: Make Dell Latitude 3350 ethernet work [ Upstream commit 708f5dcc21ae9b35f395865fc154b0105baf4de4 ] The Dell Latitude 3350's ethernet card attempts to use a reserved IRQ (18), resulting in ACPI being unable to enable the ethernet. Adding it to acpi_rev_dmi_table[] helps to work around this problem. Signed-off-by: Michael Pobega [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/blacklist.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c index 4f87fd748303..4421f7c9981c 100644 --- a/drivers/acpi/blacklist.c +++ b/drivers/acpi/blacklist.c @@ -176,6 +176,18 @@ static struct dmi_system_id acpi_rev_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3520"), }, }, + /* + * Resolves a quirk with the Dell Latitude 3350 that + * causes the ethernet adapter to not function. + */ + { + .callback = dmi_enable_rev_override, + .ident = "DELL Latitude 3350", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude 3350"), + }, + }, #endif {} }; From 4fa1c65cf040a933ee8d2d0d1c0df83361add7ab Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Fri, 17 Mar 2017 00:48:32 +0000 Subject: [PATCH 342/357] serial: 8250_pci: Detach low-level driver during PCI error recovery [ Upstream commit f209fa03fc9d131b3108c2e4936181eabab87416 ] During a PCI error recovery, like the ones provoked by EEH in the ppc64 platform, all IO to the device must be blocked while the recovery is completed. Current 8250_pci implementation only suspends the port instead of detaching it, which doesn't prevent incoming accesses like TIOCMGET and TIOCMSET calls from reaching the device. Those end up racing with the EEH recovery, crashing it. Similar races were also observed when opening the device and when shutting it down during recovery. This patch implements a more robust IO blockage for the 8250_pci recovery by unregistering the port at the beginning of the procedure and re-adding it afterwards. Since the port is detached from the uart layer, we can be sure that no request will make through to the device during recovery. This is similar to the solution used by the JSM serial driver. I thank Peter Hurley for valuable input on this one over one year ago. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 4d09bd495a88..6e3e63675e56 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -52,6 +52,7 @@ struct serial_private { struct pci_dev *dev; unsigned int nr; struct pci_serial_quirk *quirk; + const struct pciserial_board *board; int line[0]; }; @@ -3871,6 +3872,7 @@ pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board) } } priv->nr = i; + priv->board = board; return priv; err_deinit: @@ -3881,7 +3883,7 @@ err_out: } EXPORT_SYMBOL_GPL(pciserial_init_ports); -void pciserial_remove_ports(struct serial_private *priv) +void pciserial_detach_ports(struct serial_private *priv) { struct pci_serial_quirk *quirk; int i; @@ -3895,7 +3897,11 @@ void pciserial_remove_ports(struct serial_private *priv) quirk = find_quirk(priv->dev); if (quirk->exit) quirk->exit(priv->dev); +} +void pciserial_remove_ports(struct serial_private *priv) +{ + pciserial_detach_ports(priv); kfree(priv); } EXPORT_SYMBOL_GPL(pciserial_remove_ports); @@ -5590,7 +5596,7 @@ static pci_ers_result_t serial8250_io_error_detected(struct pci_dev *dev, return PCI_ERS_RESULT_DISCONNECT; if (priv) - pciserial_suspend_ports(priv); + pciserial_detach_ports(priv); pci_disable_device(dev); @@ -5615,9 +5621,18 @@ static pci_ers_result_t serial8250_io_slot_reset(struct pci_dev *dev) static void serial8250_io_resume(struct pci_dev *dev) { struct serial_private *priv = pci_get_drvdata(dev); + const struct pciserial_board *board; - if (priv) - pciserial_resume_ports(priv); + if (!priv) + return; + + board = priv->board; + kfree(priv); + priv = pciserial_init_ports(dev, board); + + if (!IS_ERR(priv)) { + pci_set_drvdata(dev, priv); + } } static const struct pci_error_handlers serial8250_err_handler = { From ce7aeffe7596343fce49307f374444b199b950a1 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 17 Mar 2017 00:48:33 +0000 Subject: [PATCH 343/357] usb: gadget: udc: atmel: remove memory leak [ Upstream commit 32856eea7bf75dfb99b955ada6e147f553a11366 ] Commit bbe097f092b0 ("usb: gadget: udc: atmel: fix endpoint name") introduced a memory leak when unbinding the driver. The endpoint names would not be freed. Solve that by including the name as a string in struct usba_ep so it is freed when the endpoint is. Signed-off-by: Alexandre Belloni Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/atmel_usba_udc.c | 3 ++- drivers/usb/gadget/udc/atmel_usba_udc.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c index 45bc997d0711..a95b3e75f750 100644 --- a/drivers/usb/gadget/udc/atmel_usba_udc.c +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c @@ -1978,7 +1978,8 @@ static struct usba_ep * atmel_udc_of_init(struct platform_device *pdev, dev_err(&pdev->dev, "of_probe: name error(%d)\n", ret); goto err; } - ep->ep.name = kasprintf(GFP_KERNEL, "ep%d", ep->index); + sprintf(ep->name, "ep%d", ep->index); + ep->ep.name = ep->name; ep->ep_regs = udc->regs + USBA_EPT_BASE(i); ep->dma_regs = udc->regs + USBA_DMA_BASE(i); diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.h b/drivers/usb/gadget/udc/atmel_usba_udc.h index 3e1c9d589dfa..b03b2ebfc53a 100644 --- a/drivers/usb/gadget/udc/atmel_usba_udc.h +++ b/drivers/usb/gadget/udc/atmel_usba_udc.h @@ -280,6 +280,7 @@ struct usba_ep { void __iomem *ep_regs; void __iomem *dma_regs; void __iomem *fifo; + char name[8]; struct usb_ep ep; struct usba_udc *udc; From 7885195b9177042aea3da65c4789790f8e788aa6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 17 Mar 2017 00:48:33 +0000 Subject: [PATCH 344/357] powerpc/mm: Fix build break when CMA=n && SPAPR_TCE_IOMMU=y MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a05ef161cdd22faccffe06f21fc8f1e249565385 ] Currently the build breaks if CMA=n and SPAPR_TCE_IOMMU=y: arch/powerpc/mm/mmu_context_iommu.c: In function ‘mm_iommu_get’: arch/powerpc/mm/mmu_context_iommu.c:193:42: error: ‘MIGRATE_CMA’ undeclared (first use in this function) if (get_pageblock_migratetype(page) == MIGRATE_CMA) { ^~~~~~~~~~~ Fix it by using the existing is_migrate_cma_page(), which evaulates to false when CMA=n. Fixes: 2e5bbb5461f1 ("KVM: PPC: Book3S HV: Migrate pinned pages out of CMA") Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/mmu_context_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index 104bad029ce9..7de7124ac91b 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -184,7 +184,7 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, * of the CMA zone if possible. NOTE: faulting in + migration * can be expensive. Batching can be considered later */ - if (get_pageblock_migratetype(page) == MIGRATE_CMA) { + if (is_migrate_cma_page(page)) { if (mm_iommu_move_page_from_cma(page)) goto populate; if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT), From f4d40cfd61ea6e99f965576d19b76b42fd371365 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 22 Nov 2016 12:45:28 -0800 Subject: [PATCH 345/357] clk: bcm2835: Fix ->fixed_divider of pllh_aux commit f2a46926aba1f0c33944901d2420a6a887455ddc upstream. There is no fixed divider on pllh_aux. Signed-off-by: Boris Brezillon Signed-off-by: Eric Anholt Reviewed-by: Eric Anholt Signed-off-by: Stephen Boyd Cc: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/clk/bcm/clk-bcm2835.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index 3bbd2a58db47..2acaa77ad482 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -1598,7 +1598,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = { .a2w_reg = A2W_PLLH_AUX, .load_mask = CM_PLLH_LOADAUX, .hold_mask = 0, - .fixed_divider = 10), + .fixed_divider = 1), [BCM2835_PLLH_PIX] = REGISTER_PLL_DIV( .name = "pllh_pix", .source_pll = "pllh", From 6b3306706733cc98bf14e30ff54fa9084b76cd55 Mon Sep 17 00:00:00 2001 From: Derek Foreman Date: Thu, 24 Nov 2016 12:11:55 -0600 Subject: [PATCH 346/357] drm/vc4: Fix race between page flip completion event and clean-up commit 26fc78f6fef39b9d7a15def5e7e9826ff68303f4 upstream. There was a small window where a userspace program could submit a pageflip after receiving a pageflip completion event yet still receive EBUSY. Signed-off-by: Derek Foreman Signed-off-by: Eric Anholt Reviewed-by: Eric Anholt Reviewed-by: Daniel Stone Cc: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_crtc.c | 8 ++++++++ drivers/gpu/drm/vc4/vc4_drv.h | 1 + drivers/gpu/drm/vc4/vc4_kms.c | 33 +++++++++++++++++++++++++-------- 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index d544ff9b0d46..a062228b6831 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -669,6 +669,14 @@ void vc4_disable_vblank(struct drm_device *dev, unsigned int crtc_id) CRTC_WRITE(PV_INTEN, 0); } +/* Must be called with the event lock held */ +bool vc4_event_pending(struct drm_crtc *crtc) +{ + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); + + return !!vc4_crtc->event; +} + static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc) { struct drm_crtc *crtc = &vc4_crtc->base; diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 7c1e4d97486f..cc77b8b5ce03 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -440,6 +440,7 @@ int vc4_bo_stats_debugfs(struct seq_file *m, void *arg); extern struct platform_driver vc4_crtc_driver; int vc4_enable_vblank(struct drm_device *dev, unsigned int crtc_id); void vc4_disable_vblank(struct drm_device *dev, unsigned int crtc_id); +bool vc4_event_pending(struct drm_crtc *crtc); int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg); int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, unsigned int flags, int *vpos, int *hpos, diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index c1f65c6c8e60..67af2af70af0 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -119,17 +119,34 @@ static int vc4_atomic_commit(struct drm_device *dev, /* Make sure that any outstanding modesets have finished. */ if (nonblock) { - ret = down_trylock(&vc4->async_modeset); - if (ret) { + struct drm_crtc *crtc; + struct drm_crtc_state *crtc_state; + unsigned long flags; + bool busy = false; + + /* + * If there's an undispatched event to send then we're + * obviously still busy. If there isn't, then we can + * unconditionally wait for the semaphore because it + * shouldn't be contended (for long). + * + * This is to prevent a race where queuing a new flip + * from userspace immediately on receipt of an event + * beats our clean-up and returns EBUSY. + */ + spin_lock_irqsave(&dev->event_lock, flags); + for_each_crtc_in_state(state, crtc, crtc_state, i) + busy |= vc4_event_pending(crtc); + spin_unlock_irqrestore(&dev->event_lock, flags); + if (busy) { kfree(c); return -EBUSY; } - } else { - ret = down_interruptible(&vc4->async_modeset); - if (ret) { - kfree(c); - return ret; - } + } + ret = down_interruptible(&vc4->async_modeset); + if (ret) { + kfree(c); + return ret; } ret = drm_atomic_helper_prepare_planes(dev, state); From 8ca7ef0d9af9644a65e3e22ccc1937f00caa777b Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 2 Dec 2016 14:48:07 +0100 Subject: [PATCH 347/357] drm/vc4: Fix ->clock_select setting for the VEC encoder commit ab8df60e3a3b68420d0d4477c5f07c00fbfb078b upstream. PV_CONTROL_CLK_SELECT_VEC is actually 2 and not 0. Fix the definition and rework the vc4_set_crtc_possible_masks() to cover the full range of the PV_CONTROL_CLK_SELECT field. Signed-off-by: Boris Brezillon Signed-off-by: Eric Anholt Cc: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_crtc.c | 36 +++++++++++++++++++++------------- drivers/gpu/drm/vc4/vc4_drv.h | 1 + drivers/gpu/drm/vc4/vc4_regs.h | 3 ++- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index a062228b6831..7aadce1f7e7a 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -83,8 +83,7 @@ struct vc4_crtc_data { /* Which channel of the HVS this pixelvalve sources from. */ int hvs_channel; - enum vc4_encoder_type encoder0_type; - enum vc4_encoder_type encoder1_type; + enum vc4_encoder_type encoder_types[4]; }; #define CRTC_WRITE(offset, val) writel(val, vc4_crtc->regs + (offset)) @@ -867,20 +866,26 @@ static const struct drm_crtc_helper_funcs vc4_crtc_helper_funcs = { static const struct vc4_crtc_data pv0_data = { .hvs_channel = 0, - .encoder0_type = VC4_ENCODER_TYPE_DSI0, - .encoder1_type = VC4_ENCODER_TYPE_DPI, + .encoder_types = { + [PV_CONTROL_CLK_SELECT_DSI] = VC4_ENCODER_TYPE_DSI0, + [PV_CONTROL_CLK_SELECT_DPI_SMI_HDMI] = VC4_ENCODER_TYPE_DPI, + }, }; static const struct vc4_crtc_data pv1_data = { .hvs_channel = 2, - .encoder0_type = VC4_ENCODER_TYPE_DSI1, - .encoder1_type = VC4_ENCODER_TYPE_SMI, + .encoder_types = { + [PV_CONTROL_CLK_SELECT_DSI] = VC4_ENCODER_TYPE_DSI1, + [PV_CONTROL_CLK_SELECT_DPI_SMI_HDMI] = VC4_ENCODER_TYPE_SMI, + }, }; static const struct vc4_crtc_data pv2_data = { .hvs_channel = 1, - .encoder0_type = VC4_ENCODER_TYPE_VEC, - .encoder1_type = VC4_ENCODER_TYPE_HDMI, + .encoder_types = { + [PV_CONTROL_CLK_SELECT_DPI_SMI_HDMI] = VC4_ENCODER_TYPE_HDMI, + [PV_CONTROL_CLK_SELECT_VEC] = VC4_ENCODER_TYPE_VEC, + }, }; static const struct of_device_id vc4_crtc_dt_match[] = { @@ -894,17 +899,20 @@ static void vc4_set_crtc_possible_masks(struct drm_device *drm, struct drm_crtc *crtc) { struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); + const struct vc4_crtc_data *crtc_data = vc4_crtc->data; + const enum vc4_encoder_type *encoder_types = crtc_data->encoder_types; struct drm_encoder *encoder; drm_for_each_encoder(encoder, drm) { struct vc4_encoder *vc4_encoder = to_vc4_encoder(encoder); + int i; - if (vc4_encoder->type == vc4_crtc->data->encoder0_type) { - vc4_encoder->clock_select = 0; - encoder->possible_crtcs |= drm_crtc_mask(crtc); - } else if (vc4_encoder->type == vc4_crtc->data->encoder1_type) { - vc4_encoder->clock_select = 1; - encoder->possible_crtcs |= drm_crtc_mask(crtc); + for (i = 0; i < ARRAY_SIZE(crtc_data->encoder_types); i++) { + if (vc4_encoder->type == encoder_types[i]) { + vc4_encoder->clock_select = i; + encoder->possible_crtcs |= drm_crtc_mask(crtc); + break; + } } } } diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index cc77b8b5ce03..50a55ef999d6 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -194,6 +194,7 @@ to_vc4_plane(struct drm_plane *plane) } enum vc4_encoder_type { + VC4_ENCODER_TYPE_NONE, VC4_ENCODER_TYPE_HDMI, VC4_ENCODER_TYPE_VEC, VC4_ENCODER_TYPE_DSI0, diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index 1aa44c2db556..39f6886b2410 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -177,8 +177,9 @@ # define PV_CONTROL_WAIT_HSTART BIT(12) # define PV_CONTROL_PIXEL_REP_MASK VC4_MASK(5, 4) # define PV_CONTROL_PIXEL_REP_SHIFT 4 -# define PV_CONTROL_CLK_SELECT_DSI_VEC 0 +# define PV_CONTROL_CLK_SELECT_DSI 0 # define PV_CONTROL_CLK_SELECT_DPI_SMI_HDMI 1 +# define PV_CONTROL_CLK_SELECT_VEC 2 # define PV_CONTROL_CLK_SELECT_MASK VC4_MASK(3, 2) # define PV_CONTROL_CLK_SELECT_SHIFT 2 # define PV_CONTROL_FIFO_CLR BIT(1) From 61e79860b4bc6259a9685a107a6b34352d6dc7bd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Feb 2017 14:32:18 +0000 Subject: [PATCH 348/357] arm64: KVM: VHE: Clear HCR_TGE when invalidating guest TLBs commit 68925176296a8b995e503349200e256674bfe5ac upstream. When invalidating guest TLBs, special care must be taken to actually shoot the guest TLBs and not the host ones if we're running on a VHE system. This is controlled by the HCR_EL2.TGE bit, which we forget to clear before invalidating TLBs. Address the issue by introducing two wrappers (__tlb_switch_to_guest and __tlb_switch_to_host) that take care of both the VTTBR_EL2 and HCR_EL2.TGE switching. Reported-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/hyp/tlb.c | 64 ++++++++++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index 88e2f2b938f0..55889d057757 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -17,14 +17,62 @@ #include +static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm) +{ + u64 val; + + /* + * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and + * most TLB operations target EL2/EL0. In order to affect the + * guest TLBs (EL1/EL0), we need to change one of these two + * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so + * let's flip TGE before executing the TLB operation. + */ + write_sysreg(kvm->arch.vttbr, vttbr_el2); + val = read_sysreg(hcr_el2); + val &= ~HCR_TGE; + write_sysreg(val, hcr_el2); + isb(); +} + +static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm) +{ + write_sysreg(kvm->arch.vttbr, vttbr_el2); + isb(); +} + +static hyp_alternate_select(__tlb_switch_to_guest, + __tlb_switch_to_guest_nvhe, + __tlb_switch_to_guest_vhe, + ARM64_HAS_VIRT_HOST_EXTN); + +static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm) +{ + /* + * We're done with the TLB operation, let's restore the host's + * view of HCR_EL2. + */ + write_sysreg(0, vttbr_el2); + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); +} + +static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm) +{ + write_sysreg(0, vttbr_el2); +} + +static hyp_alternate_select(__tlb_switch_to_host, + __tlb_switch_to_host_nvhe, + __tlb_switch_to_host_vhe, + ARM64_HAS_VIRT_HOST_EXTN); + void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { dsb(ishst); /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - write_sysreg(kvm->arch.vttbr, vttbr_el2); - isb(); + __tlb_switch_to_guest()(kvm); /* * We could do so much better if we had the VA as well. @@ -45,7 +93,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) dsb(ish); isb(); - write_sysreg(0, vttbr_el2); + __tlb_switch_to_host()(kvm); } void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) @@ -54,14 +102,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - write_sysreg(kvm->arch.vttbr, vttbr_el2); - isb(); + __tlb_switch_to_guest()(kvm); asm volatile("tlbi vmalls12e1is" : : ); dsb(ish); isb(); - write_sysreg(0, vttbr_el2); + __tlb_switch_to_host()(kvm); } void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) @@ -69,14 +116,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); /* Switch to requested VMID */ - write_sysreg(kvm->arch.vttbr, vttbr_el2); - isb(); + __tlb_switch_to_guest()(kvm); asm volatile("tlbi vmalle1" : : ); dsb(nsh); isb(); - write_sysreg(0, vttbr_el2); + __tlb_switch_to_host()(kvm); } void __hyp_text __kvm_flush_vm_context(void) From 095635be809ade2429c844d09e8391330fa9bca4 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Tue, 7 Mar 2017 08:20:38 -0600 Subject: [PATCH 349/357] irqchip/gicv3-its: Add workaround for QDF2400 ITS erratum 0065 commit 90922a2d03d84de36bf8a9979d62580102f31a92 upstream. On Qualcomm Datacenter Technologies QDF2400 SoCs, the ITS hardware implementation uses 16Bytes for Interrupt Translation Entry (ITE), but reports an incorrect value of 8Bytes in GITS_TYPER.ITTE_size. It might cause kernel memory corruption depending on the number of MSI(x) that are configured and the amount of memory that has been allocated for ITEs in its_create_device(). This patch fixes the potential memory corruption by setting the correct ITE size to 16Bytes. Cc: stable@vger.kernel.org Signed-off-by: Shanker Donthineni Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- Documentation/arm64/silicon-errata.txt | 44 ++++++++++++++------------ arch/arm64/Kconfig | 10 ++++++ drivers/irqchip/irq-gic-v3-its.c | 16 ++++++++++ 3 files changed, 49 insertions(+), 21 deletions(-) diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 405da11fc3e4..d11af52427b4 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -42,24 +42,26 @@ file acts as a registry of software workarounds in the Linux Kernel and will be updated when new workarounds are committed and backported to stable kernels. -| Implementor | Component | Erratum ID | Kconfig | -+----------------+-----------------+-----------------+-------------------------+ -| ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | -| ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 | -| ARM | Cortex-A53 | #824069 | ARM64_ERRATUM_824069 | -| ARM | Cortex-A53 | #819472 | ARM64_ERRATUM_819472 | -| ARM | Cortex-A53 | #845719 | ARM64_ERRATUM_845719 | -| ARM | Cortex-A53 | #843419 | ARM64_ERRATUM_843419 | -| ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 | -| ARM | Cortex-A57 | #852523 | N/A | -| ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 | -| ARM | Cortex-A72 | #853709 | N/A | -| ARM | MMU-500 | #841119,#826419 | N/A | -| | | | | -| Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | -| Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 | -| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | -| Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 | -| Cavium | ThunderX SMMUv2 | #27704 | N/A | -| | | | | -| Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | +| Implementor | Component | Erratum ID | Kconfig | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | +| ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 | +| ARM | Cortex-A53 | #824069 | ARM64_ERRATUM_824069 | +| ARM | Cortex-A53 | #819472 | ARM64_ERRATUM_819472 | +| ARM | Cortex-A53 | #845719 | ARM64_ERRATUM_845719 | +| ARM | Cortex-A53 | #843419 | ARM64_ERRATUM_843419 | +| ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 | +| ARM | Cortex-A57 | #852523 | N/A | +| ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 | +| ARM | Cortex-A72 | #853709 | N/A | +| ARM | MMU-500 | #841119,#826419 | N/A | +| | | | | +| Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | +| Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 | +| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | +| Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 | +| Cavium | ThunderX SMMUv2 | #27704 | N/A | +| | | | | +| Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | +| | | | | +| Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 969ef880d234..cf57a7799a0f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -474,6 +474,16 @@ config CAVIUM_ERRATUM_27456 If unsure, say Y. +config QCOM_QDF2400_ERRATUM_0065 + bool "QDF2400 E0065: Incorrect GITS_TYPER.ITT_Entry_size" + default y + help + On Qualcomm Datacenter Technologies QDF2400 SoC, ITS hardware reports + ITE size incorrectly. The GITS_TYPER.ITT_Entry_size field should have + been indicated as 16Bytes (0xf), not 8Bytes (0x7). + + If unsure, say Y. + endmenu diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index c5dee300e8a3..acb9d250a905 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1598,6 +1598,14 @@ static void __maybe_unused its_enable_quirk_cavium_23144(void *data) its->flags |= ITS_FLAGS_WORKAROUND_CAVIUM_23144; } +static void __maybe_unused its_enable_quirk_qdf2400_e0065(void *data) +{ + struct its_node *its = data; + + /* On QDF2400, the size of the ITE is 16Bytes */ + its->ite_size = 16; +} + static const struct gic_quirk its_quirks[] = { #ifdef CONFIG_CAVIUM_ERRATUM_22375 { @@ -1614,6 +1622,14 @@ static const struct gic_quirk its_quirks[] = { .mask = 0xffff0fff, .init = its_enable_quirk_cavium_23144, }, +#endif +#ifdef CONFIG_QCOM_QDF2400_ERRATUM_0065 + { + .desc = "ITS: QDF2400 erratum 0065", + .iidr = 0x00001070, /* QDF2400 ITS rev 1.x */ + .mask = 0xffffffff, + .init = its_enable_quirk_qdf2400_e0065, + }, #endif { } From 5ec98e6ffdce5eb5dbe3593df2aa824d04d70842 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 13 Mar 2017 15:57:12 +0100 Subject: [PATCH 350/357] x86/tsc: Fix ART for TSC_KNOWN_FREQ commit 44fee88cea43d3c2cac962e0439cb10a3cabff6d upstream. Subhransu reported that convert_art_to_tsc() isn't working for him. The ART to TSC relation is only set up for systems which use the refined TSC calibration. Systems with known TSC frequency (available via CPUID 15) are not using the refined calibration and therefor the ART to TSC relation is never established. Add the setup to the known frequency init path which skips ART calibration. The init code needs to be duplicated as for systems which use refined calibration the ART setup must be delayed until calibration has been done. The problem has been there since the ART support was introdduced, but only detected now because Subhransu tested the first time on hardware which has TSC frequency enumerated via CPUID 15. Note for stable: The conditional has changed from TSC_RELIABLE to TSC_KNOWN_FREQUENCY. [ tglx: Rewrote changelog and identified the proper 'Fixes' commit ] Fixes: f9677e0f8308 ("x86/tsc: Always Running Timer (ART) correlated clocksource") Reported-by: "Prusty, Subhransu S" Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Cc: christopher.s.hall@intel.com Cc: kevin.b.stanton@intel.com Cc: john.stultz@linaro.org Cc: akataria@vmware.com Link: http://lkml.kernel.org/r/20170313145712.GI3312@twins.programming.kicks-ass.net Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/tsc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 46b2f41f8b05..eea88fe5d969 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1287,6 +1287,8 @@ static int __init init_tsc_clocksource(void) * exporting a reliable TSC. */ if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { + if (boot_cpu_has(X86_FEATURE_ART)) + art_related_clocksource = &clocksource_tsc; clocksource_register_khz(&clocksource_tsc, tsc_khz); return 0; } From 24ba2842a49da3338581b42a5618088d2a0b0730 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 13 Mar 2017 19:33:37 +0300 Subject: [PATCH 351/357] x86/kasan: Fix boot with KASAN=y and PROFILE_ANNOTATED_BRANCHES=y commit be3606ff739d1c1be36389f8737c577ad87e1f57 upstream. The kernel doesn't boot with both PROFILE_ANNOTATED_BRANCHES=y and KASAN=y options selected. With branch profiling enabled we end up calling ftrace_likely_update() before kasan_early_init(). ftrace_likely_update() is built with KASAN instrumentation, so calling it before kasan has been initialized leads to crash. Use DISABLE_BRANCH_PROFILING define to make sure that we don't call ftrace_likely_update() from early code before kasan_early_init(). Fixes: ef7f0d6a6ca8 ("x86_64: add KASan support") Reported-by: Fengguang Wu Signed-off-by: Andrey Ryabinin Cc: kasan-dev@googlegroups.com Cc: Alexander Potapenko Cc: Andrew Morton Cc: lkp@01.org Cc: Dmitry Vyukov Link: http://lkml.kernel.org/r/20170313163337.1704-1-aryabinin@virtuozzo.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/head64.c | 1 + arch/x86/mm/kasan_init_64.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 54a2372f5dbb..b5785c197e53 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -4,6 +4,7 @@ * Copyright (C) 2000 Andrea Arcangeli SuSE */ +#define DISABLE_BRANCH_PROFILING #include #include #include diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 0493c17b8a51..333362f992e4 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -1,3 +1,4 @@ +#define DISABLE_BRANCH_PROFILING #define pr_fmt(fmt) "kasan: " fmt #include #include From 9f9115b67aa5821e1d5490d94c1ad87d0396f7b6 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 16 Mar 2017 12:59:39 -0700 Subject: [PATCH 352/357] x86/perf: Fix CR4.PCE propagation to use active_mm instead of mm commit 5dc855d44c2ad960a86f593c60461f1ae1566b6d upstream. If one thread mmaps a perf event while another thread in the same mm is in some context where active_mm != mm (which can happen in the scheduler, for example), refresh_pce() would write the wrong value to CR4.PCE. This broke some PAPI tests. Reported-and-tested-by: Vince Weaver Signed-off-by: Andy Lutomirski Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Fixes: 7911d3f7af14 ("perf/x86: Only allow rdpmc if a perf_event is mapped") Link: http://lkml.kernel.org/r/0c5b38a76ea50e405f9abe07a13dfaef87c173a1.1489694270.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 7fe88bb57e36..38623e219816 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2096,8 +2096,8 @@ static int x86_pmu_event_init(struct perf_event *event) static void refresh_pce(void *ignored) { - if (current->mm) - load_mm_cr4(current->mm); + if (current->active_mm) + load_mm_cr4(current->active_mm); } static void x86_pmu_event_mapped(struct perf_event *event) From 6244ffc5a1221e593a937c7ad3c03a9ce691a8df Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 4 Mar 2017 10:27:18 +0100 Subject: [PATCH 353/357] futex: Fix potential use-after-free in FUTEX_REQUEUE_PI commit c236c8e95a3d395b0494e7108f0d41cf36ec107c upstream. While working on the futex code, I stumbled over this potential use-after-free scenario. Dmitry triggered it later with syzkaller. pi_mutex is a pointer into pi_state, which we drop the reference on in unqueue_me_pi(). So any access to that pointer after that is bad. Since other sites already do rt_mutex_unlock() with hb->lock held, see for example futex_lock_pi(), simply move the unlock before unqueue_me_pi(). Reported-by: Dmitry Vyukov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Darren Hart Cc: juri.lelli@arm.com Cc: bigeasy@linutronix.de Cc: xlpang@redhat.com Cc: rostedt@goodmis.org Cc: mathieu.desnoyers@efficios.com Cc: jdesfossez@efficios.com Cc: dvhart@infradead.org Cc: bristot@redhat.com Link: http://lkml.kernel.org/r/20170304093558.801744246@infradead.org Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 38b68c2735c5..aa45c4e13880 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2813,7 +2813,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, { struct hrtimer_sleeper timeout, *to = NULL; struct rt_mutex_waiter rt_waiter; - struct rt_mutex *pi_mutex = NULL; struct futex_hash_bucket *hb; union futex_key key2 = FUTEX_KEY_INIT; struct futex_q q = futex_q_init; @@ -2905,6 +2904,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, spin_unlock(q.lock_ptr); } } else { + struct rt_mutex *pi_mutex; + /* * We have been woken up by futex_unlock_pi(), a timeout, or a * signal. futex_unlock_pi() will not destroy the lock_ptr nor @@ -2928,18 +2929,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (res) ret = (res < 0) ? res : 0; + /* + * If fixup_pi_state_owner() faulted and was unable to handle + * the fault, unlock the rt_mutex and return the fault to + * userspace. + */ + if (ret && rt_mutex_owner(pi_mutex) == current) + rt_mutex_unlock(pi_mutex); + /* Unqueue and drop the lock. */ unqueue_me_pi(&q); } - /* - * If fixup_pi_state_owner() faulted and was unable to handle the - * fault, unlock the rt_mutex and return the fault to userspace. - */ - if (ret == -EFAULT) { - if (pi_mutex && rt_mutex_owner(pi_mutex) == current) - rt_mutex_unlock(pi_mutex); - } else if (ret == -EINTR) { + if (ret == -EINTR) { /* * We've already been requeued, but cannot restart by calling * futex_lock_pi() directly. We could restart this syscall, but From 1522181f4bc14a61c72981439fcbe9a87496f3cf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 4 Mar 2017 10:27:19 +0100 Subject: [PATCH 354/357] futex: Add missing error handling to FUTEX_REQUEUE_PI commit 9bbb25afeb182502ca4f2c4f3f88af0681b34cae upstream. Thomas spotted that fixup_pi_state_owner() can return errors and we fail to unlock the rt_mutex in that case. Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Darren Hart Cc: juri.lelli@arm.com Cc: bigeasy@linutronix.de Cc: xlpang@redhat.com Cc: rostedt@goodmis.org Cc: mathieu.desnoyers@efficios.com Cc: jdesfossez@efficios.com Cc: dvhart@infradead.org Cc: bristot@redhat.com Link: http://lkml.kernel.org/r/20170304093558.867401760@infradead.org Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/futex.c b/kernel/futex.c index aa45c4e13880..4c6b6e697b73 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2896,6 +2896,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (q.pi_state && (q.pi_state->owner != current)) { spin_lock(q.lock_ptr); ret = fixup_pi_state_owner(uaddr2, &q, current); + if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) + rt_mutex_unlock(&q.pi_state->pi_mutex); /* * Drop the reference to the pi state which * the requeue_pi() code acquired for us. From c40609652267440ae6f800303ea1f3be17d357e2 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Sat, 25 Feb 2017 01:17:53 +0100 Subject: [PATCH 355/357] locking/rwsem: Fix down_write_killable() for CONFIG_RWSEM_GENERIC_SPINLOCK=y commit 17fcbd590d0c3e35bd9646e2215f86586378bc42 upstream. We hang if SIGKILL has been sent, but the task is stuck in down_read() (after do_exit()), even though no task is doing down_write() on the rwsem in question: INFO: task libupnp:21868 blocked for more than 120 seconds. libupnp D 0 21868 1 0x08100008 ... Call Trace: __schedule() schedule() __down_read() do_exit() do_group_exit() __wake_up_parent() This bug has already been fixed for CONFIG_RWSEM_XCHGADD_ALGORITHM=y in the following commit: 04cafed7fc19 ("locking/rwsem: Fix down_write_killable()") ... however, this bug also exists for CONFIG_RWSEM_GENERIC_SPINLOCK=y. Signed-off-by: Niklas Cassel Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Andrew Morton Cc: Linus Torvalds Cc: Niklas Cassel Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: d47996082f52 ("locking/rwsem: Introduce basis for down_write_killable()") Link: http://lkml.kernel.org/r/1487981873-12649-1-git-send-email-niklass@axis.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/locking/rwsem-spinlock.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 1591f6b3539f..2bef4ab94003 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -216,10 +216,8 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state) */ if (sem->count == 0) break; - if (signal_pending_state(state, current)) { - ret = -EINTR; - goto out; - } + if (signal_pending_state(state, current)) + goto out_nolock; set_task_state(tsk, state); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); schedule(); @@ -227,12 +225,19 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state) } /* got the lock */ sem->count = -1; -out: list_del(&waiter.list); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); return ret; + +out_nolock: + list_del(&waiter.list); + if (!list_empty(&sem->wait_list)) + __rwsem_do_wake(sem, 1); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + + return -EINTR; } void __sched __down_write(struct rw_semaphore *sem) From ecdc5b12d7a01707b5b3f334cdf47f01e50d2ce3 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 3 Mar 2017 17:56:55 +1100 Subject: [PATCH 356/357] crypto: powerpc - Fix initialisation of crc32c context commit aa2be9b3d6d2d699e9ca7cbfc00867c80e5da213 upstream. Turning on crypto self-tests on a POWER8 shows: alg: hash: Test 1 failed for crc32c-vpmsum 00000000: ff ff ff ff Comparing the code with the Intel CRC32c implementation on which ours is based shows that we are doing an init with 0, not ~0 as CRC32c requires. This probably wasn't caught because btrfs does its own weird open-coded initialisation. Initialise our internal context to ~0 on init. This makes the self-tests pass, and btrfs continues to work. Fixes: 6dd7a82cc54e ("crypto: powerpc - Add POWER8 optimised crc32c") Cc: Anton Blanchard Signed-off-by: Daniel Axtens Acked-by: Anton Blanchard Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/crypto/crc32c-vpmsum_glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c index 9fa046d56eba..411994551afc 100644 --- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c +++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c @@ -52,7 +52,7 @@ static int crc32c_vpmsum_cra_init(struct crypto_tfm *tfm) { u32 *key = crypto_tfm_ctx(tfm); - *key = 0; + *key = ~0; return 0; } From c3825da143fc419e2639e602f62d793ed0de4657 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 22 Mar 2017 12:44:07 +0100 Subject: [PATCH 357/357] Linux 4.9.17 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4e0f962eb434..004f90a4e613 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 16 +SUBLEVEL = 17 EXTRAVERSION = NAME = Roaring Lionus