From 76267a8a19cdc40094796a2bd032eaf437774c35 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Thu, 21 Dec 2017 21:10:36 -0500 Subject: [PATCH 01/29] KVM/Eventfd: Avoid crash when assign and deassign specific eventfd in parallel. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b5020a8e6b54d2ece80b1e7dedb33c79a40ebd47 upstream. Syzbot reports crashes in kvm_irqfd_assign(), caused by use-after-free when kvm_irqfd_assign() and kvm_irqfd_deassign() run in parallel for one specific eventfd. When the assign path hasn't finished but irqfd has been added to kvm->irqfds.items list, another thead may deassign the eventfd and free struct kvm_kernel_irqfd(). The assign path then uses the struct kvm_kernel_irqfd that has been freed by deassign path. To avoid such issue, keep irqfd under kvm->irq_srcu protection after the irqfd has been added to kvm->irqfds.items list, and call synchronize_srcu() in irq_shutdown() to make sure that irqfd has been fully initialized in the assign path. Reported-by: Dmitry Vyukov Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Dmitry Vyukov Signed-off-by: Tianyu Lan Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- virt/kvm/eventfd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 4d28a9ddbee0..3f24eb1e8554 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -119,8 +119,12 @@ irqfd_shutdown(struct work_struct *work) { struct kvm_kernel_irqfd *irqfd = container_of(work, struct kvm_kernel_irqfd, shutdown); + struct kvm *kvm = irqfd->kvm; u64 cnt; + /* Make sure irqfd has been initalized in assign path. */ + synchronize_srcu(&kvm->irq_srcu); + /* * Synchronize with the wait-queue and unhook ourselves to prevent * further events. @@ -387,7 +391,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) idx = srcu_read_lock(&kvm->irq_srcu); irqfd_update(kvm, irqfd); - srcu_read_unlock(&kvm->irq_srcu, idx); list_add_tail(&irqfd->list, &kvm->irqfds.items); @@ -421,6 +424,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) } #endif + srcu_read_unlock(&kvm->irq_srcu, idx); return 0; fail: From 6ac85d22332118d2dc449536110a31a5ca3956b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 9 Jul 2018 16:35:34 +0300 Subject: [PATCH 02/29] x86/apm: Don't access __preempt_count with zeroed fs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6f6060a5c9cc76fdbc22748264e6aa3779ec2427 upstream. APM_DO_POP_SEGS does not restore fs/gs which were zeroed by APM_DO_ZERO_SEGS. Trying to access __preempt_count with zeroed fs doesn't really work. Move the ibrs call outside the APM_DO_SAVE_SEGS/APM_DO_RESTORE_SEGS invocations so that fs is actually restored before calling preempt_enable(). Fixes the following sort of oopses: [ 0.313581] general protection fault: 0000 [#1] PREEMPT SMP [ 0.313803] Modules linked in: [ 0.314040] CPU: 0 PID: 268 Comm: kapmd Not tainted 4.16.0-rc1-triton-bisect-00090-gdd84441a7971 #19 [ 0.316161] EIP: __apm_bios_call_simple+0xc8/0x170 [ 0.316161] EFLAGS: 00210016 CPU: 0 [ 0.316161] EAX: 00000102 EBX: 00000000 ECX: 00000102 EDX: 00000000 [ 0.316161] ESI: 0000530e EDI: dea95f64 EBP: dea95f18 ESP: dea95ef0 [ 0.316161] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 [ 0.316161] CR0: 80050033 CR2: 00000000 CR3: 015d3000 CR4: 000006d0 [ 0.316161] Call Trace: [ 0.316161] ? cpumask_weight.constprop.15+0x20/0x20 [ 0.316161] on_cpu0+0x44/0x70 [ 0.316161] apm+0x54e/0x720 [ 0.316161] ? __switch_to_asm+0x26/0x40 [ 0.316161] ? __schedule+0x17d/0x590 [ 0.316161] kthread+0xc0/0xf0 [ 0.316161] ? proc_apm_show+0x150/0x150 [ 0.316161] ? kthread_create_worker_on_cpu+0x20/0x20 [ 0.316161] ret_from_fork+0x2e/0x38 [ 0.316161] Code: da 8e c2 8e e2 8e ea 57 55 2e ff 1d e0 bb 5d b1 0f 92 c3 5d 5f 07 1f 89 47 0c 90 8d b4 26 00 00 00 00 90 8d b4 26 00 00 00 00 90 <64> ff 0d 84 16 5c b1 74 7f 8b 45 dc 8e e0 8b 45 d8 8e e8 8b 45 [ 0.316161] EIP: __apm_bios_call_simple+0xc8/0x170 SS:ESP: 0068:dea95ef0 [ 0.316161] ---[ end trace 656253db2deaa12c ]--- Fixes: dd84441a7971 ("x86/speculation: Use IBRS if available before calling into firmware") Signed-off-by: Ville Syrjälä Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Cc: David Woodhouse Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: David Woodhouse Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180709133534.5963-1-ville.syrjala@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/apm.h | 6 ------ arch/x86/kernel/apm_32.c | 5 +++++ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index 46e40aeae446..93eebc636c76 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -6,8 +6,6 @@ #ifndef _ASM_X86_MACH_DEFAULT_APM_H #define _ASM_X86_MACH_DEFAULT_APM_H -#include - #ifdef APM_ZERO_SEGS # define APM_DO_ZERO_SEGS \ "pushl %%ds\n\t" \ @@ -33,7 +31,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ - firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -46,7 +43,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, "=S" (*esi) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); - firmware_restrict_branch_speculation_end(); } static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, @@ -59,7 +55,6 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ - firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -72,7 +67,6 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, "=S" (si) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); - firmware_restrict_branch_speculation_end(); return error; } diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 51287cd90bf6..313a85a7b222 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -239,6 +239,7 @@ #include #include #include +#include #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) extern int (*console_blank_hook)(int); @@ -613,11 +614,13 @@ static long __apm_bios_call(void *_call) gdt[0x40 / 8] = bad_bios_desc; apm_irq_save(flags); + firmware_restrict_branch_speculation_start(); APM_DO_SAVE_SEGS; apm_bios_call_asm(call->func, call->ebx, call->ecx, &call->eax, &call->ebx, &call->ecx, &call->edx, &call->esi); APM_DO_RESTORE_SEGS; + firmware_restrict_branch_speculation_end(); apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; put_cpu(); @@ -689,10 +692,12 @@ static long __apm_bios_call_simple(void *_call) gdt[0x40 / 8] = bad_bios_desc; apm_irq_save(flags); + firmware_restrict_branch_speculation_start(); APM_DO_SAVE_SEGS; error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx, &call->eax); APM_DO_RESTORE_SEGS; + firmware_restrict_branch_speculation_end(); apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; put_cpu(); From 91b6b9d0bf39443d708352c193c597a01e9b0004 Mon Sep 17 00:00:00 2001 From: Dewet Thibaut Date: Mon, 16 Jul 2018 10:49:27 +0200 Subject: [PATCH 03/29] x86/MCE: Remove min interval polling limitation commit fbdb328c6bae0a7c78d75734a738b66b86dffc96 upstream. commit b3b7c4795c ("x86/MCE: Serialize sysfs changes") introduced a min interval limitation when setting the check interval for polled MCEs. However, the logic is that 0 disables polling for corrected MCEs, see Documentation/x86/x86_64/machinecheck. The limitation prevents disabling. Remove this limitation and allow the value 0 to disable polling again. Fixes: b3b7c4795c ("x86/MCE: Serialize sysfs changes") Signed-off-by: Dewet Thibaut Signed-off-by: Alexander Sverdlin [ Massage commit message. ] Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180716084927.24869-1-alexander.sverdlin@nokia.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index c49e146d4332..7e6163c9d434 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2397,9 +2397,6 @@ static ssize_t store_int_with_restart(struct device *s, if (check_interval == old_check_interval) return ret; - if (check_interval < 1) - check_interval = 1; - mutex_lock(&mce_sysfs_mutex); mce_restart(); mutex_unlock(&mce_sysfs_mutex); From 6fc87cc95b4de93348821f7a23c927b58deab593 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Fri, 20 Jul 2018 17:53:42 -0700 Subject: [PATCH 04/29] fat: fix memory allocation failure handling of match_strdup() commit 35033ab988c396ad7bce3b6d24060c16a9066db8 upstream. In parse_options(), if match_strdup() failed, parse_options() leaves opts->iocharset in unexpected state (i.e. still pointing the freed string). And this can be the cause of double free. To fix, this initialize opts->iocharset always when freeing. Link: http://lkml.kernel.org/r/8736wp9dzc.fsf@mail.parknet.co.jp Signed-off-by: OGAWA Hirofumi Reported-by: syzbot+90b8e10515ae88228a92@syzkaller.appspotmail.com Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/fat/inode.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index a2c05f2ada6d..88720011a6eb 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -696,13 +696,21 @@ static void fat_set_state(struct super_block *sb, brelse(bh); } +static void fat_reset_iocharset(struct fat_mount_options *opts) +{ + if (opts->iocharset != fat_default_iocharset) { + /* Note: opts->iocharset can be NULL here */ + kfree(opts->iocharset); + opts->iocharset = fat_default_iocharset; + } +} + static void delayed_free(struct rcu_head *p) { struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu); unload_nls(sbi->nls_disk); unload_nls(sbi->nls_io); - if (sbi->options.iocharset != fat_default_iocharset) - kfree(sbi->options.iocharset); + fat_reset_iocharset(&sbi->options); kfree(sbi); } @@ -1117,7 +1125,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, opts->fs_fmask = opts->fs_dmask = current_umask(); opts->allow_utime = -1; opts->codepage = fat_default_codepage; - opts->iocharset = fat_default_iocharset; + fat_reset_iocharset(opts); if (is_vfat) { opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95; opts->rodir = 0; @@ -1274,8 +1282,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, /* vfat specific */ case Opt_charset: - if (opts->iocharset != fat_default_iocharset) - kfree(opts->iocharset); + fat_reset_iocharset(opts); iocharset = match_strdup(&args[0]); if (!iocharset) return -ENOMEM; @@ -1866,8 +1873,7 @@ out_fail: iput(fat_inode); unload_nls(sbi->nls_io); unload_nls(sbi->nls_disk); - if (sbi->options.iocharset != fat_default_iocharset) - kfree(sbi->options.iocharset); + fat_reset_iocharset(&sbi->options); sb->s_fs_info = NULL; kfree(sbi); return error; From c4f094deb3d69dcc8b4e3dc6c056c1e62a72c33e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 17 Jul 2018 17:26:43 +0200 Subject: [PATCH 05/29] ALSA: rawmidi: Change resized buffers atomically commit 39675f7a7c7e7702f7d5341f1e0d01db746543a0 upstream. The SNDRV_RAWMIDI_IOCTL_PARAMS ioctl may resize the buffers and the current code is racy. For example, the sequencer client may write to buffer while it being resized. As a simple workaround, let's switch to the resized buffer inside the stream runtime lock. Reported-by: syzbot+52f83f0ea8df16932f7f@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/rawmidi.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 16f8124b1150..59111cadaec2 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -635,7 +635,7 @@ static int snd_rawmidi_info_select_user(struct snd_card *card, int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream, struct snd_rawmidi_params * params) { - char *newbuf; + char *newbuf, *oldbuf; struct snd_rawmidi_runtime *runtime = substream->runtime; if (substream->append && substream->use_count > 1) @@ -648,13 +648,17 @@ int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream, return -EINVAL; } if (params->buffer_size != runtime->buffer_size) { - newbuf = krealloc(runtime->buffer, params->buffer_size, - GFP_KERNEL); + newbuf = kmalloc(params->buffer_size, GFP_KERNEL); if (!newbuf) return -ENOMEM; + spin_lock_irq(&runtime->lock); + oldbuf = runtime->buffer; runtime->buffer = newbuf; runtime->buffer_size = params->buffer_size; runtime->avail = runtime->buffer_size; + runtime->appl_ptr = runtime->hw_ptr = 0; + spin_unlock_irq(&runtime->lock); + kfree(oldbuf); } runtime->avail_min = params->avail_min; substream->active_sensing = !params->no_active_sensing; @@ -665,7 +669,7 @@ EXPORT_SYMBOL(snd_rawmidi_output_params); int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream, struct snd_rawmidi_params * params) { - char *newbuf; + char *newbuf, *oldbuf; struct snd_rawmidi_runtime *runtime = substream->runtime; snd_rawmidi_drain_input(substream); @@ -676,12 +680,16 @@ int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream, return -EINVAL; } if (params->buffer_size != runtime->buffer_size) { - newbuf = krealloc(runtime->buffer, params->buffer_size, - GFP_KERNEL); + newbuf = kmalloc(params->buffer_size, GFP_KERNEL); if (!newbuf) return -ENOMEM; + spin_lock_irq(&runtime->lock); + oldbuf = runtime->buffer; runtime->buffer = newbuf; runtime->buffer_size = params->buffer_size; + runtime->appl_ptr = runtime->hw_ptr = 0; + spin_unlock_irq(&runtime->lock); + kfree(oldbuf); } runtime->avail_min = params->avail_min; return 0; From 3a80fb0d7752e6ae163d72736b9401076c7b75c9 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 28 Jun 2018 16:59:14 -0700 Subject: [PATCH 06/29] ARC: Fix CONFIG_SWAP commit 6e3761145a9ba3ce267c330b6bff51cf6a057b06 upstream. swap was broken on ARC due to silly copy-paste issue. We encode offset from swapcache page in __swp_entry() as (off << 13) but were not decoding back in __swp_offset() as (off >> 13) - it was still (off << 13). This finally fixes swap usage on ARC. | # mkswap /dev/sda2 | | # swapon -a -e /dev/sda2 | Adding 500728k swap on /dev/sda2. Priority:-2 extents:1 across:500728k | | # free | total used free shared buffers cached | Mem: 765104 13456 751648 4736 8 4736 | -/+ buffers/cache: 8712 756392 | Swap: 500728 0 500728 Cc: stable@vger.kernel.org Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index e94ca72b974e..c10f5cb203e6 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -378,7 +378,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, /* Decode a PTE containing swap "identifier "into constituents */ #define __swp_type(pte_lookalike) (((pte_lookalike).val) & 0x1f) -#define __swp_offset(pte_lookalike) ((pte_lookalike).val << 13) +#define __swp_offset(pte_lookalike) ((pte_lookalike).val >> 13) /* NOPs, to keep generic kernel happy */ #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) From 2ee7d6f173308488c4ea190cbedd435b2a166e46 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 11 Jul 2018 10:42:20 -0700 Subject: [PATCH 07/29] ARC: mm: allow mprotect to make stack mappings executable commit 93312b6da4df31e4102ce5420e6217135a16c7ea upstream. mprotect(EXEC) was failing for stack mappings as default vm flags was missing MAYEXEC. This was triggered by glibc test suite nptl/tst-execstack testcase What is surprising is that despite running LTP for years on, we didn't catch this issue as it lacks a directed test case. gcc dejagnu tests with nested functions also requiring exec stack work fine though because they rely on the GNU_STACK segment spit out by compiler and handled in kernel elf loader. This glibc case is different as the stack is non exec to begin with and a dlopen of shared lib with GNU_STACK segment triggers the exec stack proceedings using a mprotect(PROT_EXEC) which was broken. CC: stable@vger.kernel.org Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/page.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 296c3426a6ad..ffb5f33475f1 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -105,7 +105,7 @@ typedef pte_t * pgtable_t; #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) /* Default Permissions for stack/heaps pages (Non Executable) */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE) +#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #define WANT_PAGE_VIRTUAL 1 From 1be686fe508d32521efd4fa7bcda581cb97813e0 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Wed, 6 Jun 2018 15:59:38 +0300 Subject: [PATCH 08/29] ARC: configs: Remove CONFIG_INITRAMFS_SOURCE from defconfigs commit 64234961c145606b36eaa82c47b11be842b21049 upstream. We used to have pre-set CONFIG_INITRAMFS_SOURCE with local path to intramfs in ARC defconfigs. This was quite convenient for in-house development but not that convenient for newcomers who obviusly don't have folders like "arc_initramfs" next to the Linux source tree. Which leads to quite surprising failure of defconfig building: ------------------------------->8----------------------------- ../scripts/gen_initramfs_list.sh: Cannot open '../../arc_initramfs_hs/' ../usr/Makefile:57: recipe for target 'usr/initramfs_data.cpio.gz' failed make[2]: *** [usr/initramfs_data.cpio.gz] Error 1 ------------------------------->8----------------------------- So now when more and more people start to deal with our defconfigs let's make their life easier with removal of CONFIG_INITRAMFS_SOURCE. Signed-off-by: Alexey Brodkin Cc: Kevin Hilman Cc: stable@vger.kernel.org Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/configs/axs101_defconfig | 1 - arch/arc/configs/axs103_defconfig | 1 - arch/arc/configs/axs103_smp_defconfig | 1 - arch/arc/configs/nsim_700_defconfig | 1 - arch/arc/configs/nsim_hs_defconfig | 1 - arch/arc/configs/nsim_hs_smp_defconfig | 1 - arch/arc/configs/nsimosci_defconfig | 1 - arch/arc/configs/nsimosci_hs_defconfig | 1 - arch/arc/configs/nsimosci_hs_smp_defconfig | 1 - 9 files changed, 9 deletions(-) diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index 0a0eaf09aac7..5367fe36b69d 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs/" CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index 2233f5777a71..d40fad485982 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index 110874705085..06cbd27ef860 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index b0066a749d4c..df609fce999b 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig index ebe9ebb92933..1dbb661b4c86 100644 --- a/arch/arc/configs/nsim_hs_defconfig +++ b/arch/arc/configs/nsim_hs_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig index 4bde43278be6..cb36a69e5ed1 100644 --- a/arch/arc/configs/nsim_hs_smp_defconfig +++ b/arch/arc/configs/nsim_hs_smp_defconfig @@ -9,7 +9,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index f6fb3d26557e..5680daa65471 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index b9f0fe00044b..87decc491c58 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index 6da71ba253a9..4d14684dc74a 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -9,7 +9,6 @@ CONFIG_IKCONFIG_PROC=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/" CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set CONFIG_KPROBES=y From f46b054eca657260236fca84748dc8946b0a24f3 Mon Sep 17 00:00:00 2001 From: Jing Xia Date: Fri, 20 Jul 2018 17:53:48 -0700 Subject: [PATCH 09/29] mm: memcg: fix use after free in mem_cgroup_iter() commit 9f15bde671355c351cf20d9f879004b234353100 upstream. It was reported that a kernel crash happened in mem_cgroup_iter(), which can be triggered if the legacy cgroup-v1 non-hierarchical mode is used. Unable to handle kernel paging request at virtual address 6b6b6b6b6b6b8f ...... Call trace: mem_cgroup_iter+0x2e0/0x6d4 shrink_zone+0x8c/0x324 balance_pgdat+0x450/0x640 kswapd+0x130/0x4b8 kthread+0xe8/0xfc ret_from_fork+0x10/0x20 mem_cgroup_iter(): ...... if (css_tryget(css)) <-- crash here break; ...... The crashing reason is that mem_cgroup_iter() uses the memcg object whose pointer is stored in iter->position, which has been freed before and filled with POISON_FREE(0x6b). And the root cause of the use-after-free issue is that invalidate_reclaim_iterators() fails to reset the value of iter->position to NULL when the css of the memcg is released in non- hierarchical mode. Link: http://lkml.kernel.org/r/1531994807-25639-1-git-send-email-jing.xia@unisoc.com Fixes: 6df38689e0e9 ("mm: memcontrol: fix possible memcg leak due to interrupted reclaim") Signed-off-by: Jing Xia Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Cc: Shakeel Butt Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 50088150fc17..349f4a8e3c4f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -895,7 +895,7 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) int nid; int i; - while ((memcg = parent_mem_cgroup(memcg))) { + for (; memcg; memcg = parent_mem_cgroup(memcg)) { for_each_node(nid) { mz = mem_cgroup_nodeinfo(memcg, nid); for (i = 0; i <= DEF_PRIORITY; i++) { From 3472e37379f15a1a19c4ee66b14e56e07613a309 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 20 Jul 2018 17:53:45 -0700 Subject: [PATCH 10/29] mm/huge_memory.c: fix data loss when splitting a file pmd commit e1f1b1572e8db87a56609fd05bef76f98f0e456a upstream. __split_huge_pmd_locked() must check if the cleared huge pmd was dirty, and propagate that to PageDirty: otherwise, data may be lost when a huge tmpfs page is modified then split then reclaimed. How has this taken so long to be noticed? Because there was no problem when the huge page is written by a write system call (shmem_write_end() calls set_page_dirty()), nor when the page is allocated for a write fault (fault_dirty_shared_page() calls set_page_dirty()); but when allocated for a read fault (which MAP_POPULATE simulates), no set_page_dirty(). Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1807111741430.1106@eggly.anvils Fixes: d21b9e57c74c ("thp: handle file pages in split_huge_pmd()") Signed-off-by: Hugh Dickins Reported-by: Ashwin Chaugule Reviewed-by: Yang Shi Reviewed-by: Kirill A. Shutemov Cc: "Huang, Ying" Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 724372866e67..9efe88ef9702 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1642,6 +1642,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, if (vma_is_dax(vma)) return; page = pmd_page(_pmd); + if (!PageDirty(page) && pmd_dirty(_pmd)) + set_page_dirty(page); if (!PageReferenced(page) && pmd_young(_pmd)) SetPageReferenced(page); page_remove_rmap(page, true); From 40974672ae870e02b781bf060b04688cf922e0f9 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 17 Jul 2018 12:39:00 -0500 Subject: [PATCH 11/29] vfio/pci: Fix potential Spectre v1 commit 0e714d27786ce1fb3efa9aac58abc096e68b1c2a upstream. info.index can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/vfio/pci/vfio_pci.c:734 vfio_pci_ioctl() warn: potential spectre issue 'vdev->region' Fix this by sanitizing info.index before indirectly using it to index vdev->region Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/pci/vfio_pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 43559bed7822..7338e43faa17 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "vfio_pci_private.h" @@ -755,6 +756,9 @@ static long vfio_pci_ioctl(void *device_data, if (info.index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) return -EINVAL; + info.index = array_index_nospec(info.index, + VFIO_PCI_NUM_REGIONS + + vdev->num_regions); i = info.index - VFIO_PCI_NUM_REGIONS; From 1e02c4f403c0b7f53e9b31ca76352e5351163c7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 14 Jun 2018 20:56:25 +0300 Subject: [PATCH 12/29] drm/i915: Fix hotplug irq ack on i965/g4x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 96a85cc517a9ee4ae5e8d7f5a36cba05023784eb upstream. Just like with PIPESTAT, the edge triggered IIR on i965/g4x also causes problems for hotplug interrupts. To make sure we don't get the IIR port interrupt bit stuck low with the ISR bit high we must force an edge in ISR. Unfortunately we can't borrow the PIPESTAT trick and toggle the enable bits in PORT_HOTPLUG_EN as that act itself generates hotplug interrupts. Instead we just have to loop until we've cleared PORT_HOTPLUG_STAT, or we just give up and WARN. v2: Don't frob with PORT_HOTPLUG_EN Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180614175625.1615-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak (cherry picked from commit 0ba7c51a6fd80a89236f6ceb52e63f8a7f62bfd3) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_irq.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 02908e37c228..279d1e021421 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1684,10 +1684,38 @@ static void valleyview_pipestat_irq_handler(struct drm_i915_private *dev_priv, static u32 i9xx_hpd_irq_ack(struct drm_i915_private *dev_priv) { - u32 hotplug_status = I915_READ(PORT_HOTPLUG_STAT); + u32 hotplug_status = 0, hotplug_status_mask; + int i; - if (hotplug_status) + if (IS_G4X(dev_priv) || + IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + hotplug_status_mask = HOTPLUG_INT_STATUS_G4X | + DP_AUX_CHANNEL_MASK_INT_STATUS_G4X; + else + hotplug_status_mask = HOTPLUG_INT_STATUS_I915; + + /* + * We absolutely have to clear all the pending interrupt + * bits in PORT_HOTPLUG_STAT. Otherwise the ISR port + * interrupt bit won't have an edge, and the i965/g4x + * edge triggered IIR will not notice that an interrupt + * is still pending. We can't use PORT_HOTPLUG_EN to + * guarantee the edge as the act of toggling the enable + * bits can itself generate a new hotplug interrupt :( + */ + for (i = 0; i < 10; i++) { + u32 tmp = I915_READ(PORT_HOTPLUG_STAT) & hotplug_status_mask; + + if (tmp == 0) + return hotplug_status; + + hotplug_status |= tmp; I915_WRITE(PORT_HOTPLUG_STAT, hotplug_status); + } + + WARN_ONCE(1, + "PORT_HOTPLUG_STAT did not clear (0x%08x)\n", + I915_READ(PORT_HOTPLUG_STAT)); return hotplug_status; } From ec6a6039d7323abd1f28dafcf435700f072a1537 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 2 Jul 2018 22:52:20 +0200 Subject: [PATCH 13/29] gen_stats: Fix netlink stats dumping in the presence of padding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d5a672ac9f48f81b20b1cad1d9ed7bbf4e418d4c ] The gen_stats facility will add a header for the toplevel nlattr of type TCA_STATS2 that contains all stats added by qdisc callbacks. A reference to this header is stored in the gnet_dump struct, and when all the per-qdisc callbacks have finished adding their stats, the length of the containing header will be adjusted to the right value. However, on architectures that need padding (i.e., that don't set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS), the padding nlattr is added before the stats, which means that the stored pointer will point to the padding, and so when the header is fixed up, the result is just a very big padding nlattr. Because most qdiscs also supply the legacy TCA_STATS struct, this problem has been mostly invisible, but we exposed it with the netlink attribute-based statistics in CAKE. Fix the issue by fixing up the stored pointer if it points to a padding nlattr. Tested-by: Pete Heist Tested-by: Kevin Darbyshire-Bryant Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/gen_stats.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 508e051304fb..18f17e1e5762 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -77,8 +77,20 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, d->lock = lock; spin_lock_bh(lock); } - if (d->tail) - return gnet_stats_copy(d, type, NULL, 0, padattr); + if (d->tail) { + int ret = gnet_stats_copy(d, type, NULL, 0, padattr); + + /* The initial attribute added in gnet_stats_copy() may be + * preceded by a padding attribute, in which case d->tail will + * end up pointing at the padding instead of the real attribute. + * Fix this so gnet_stats_finish_copy() adjusts the length of + * the right attribute. + */ + if (ret == 0 && d->tail->nla_type == padattr) + d->tail = (struct nlattr *)((char *)d->tail + + NLA_ALIGN(d->tail->nla_len)); + return ret; + } return 0; } From 79870c6c659713772c8969fd8c21dcba8fd253be Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Thu, 5 Jul 2018 18:49:23 +0000 Subject: [PATCH 14/29] ipv4: Return EINVAL when ping_group_range sysctl doesn't map to user ns [ Upstream commit 70ba5b6db96ff7324b8cfc87e0d0383cf59c9677 ] The low and high values of the net.ipv4.ping_group_range sysctl were being silently forced to the default disabled state when a write to the sysctl contained GIDs that didn't map to the associated user namespace. Confusingly, the sysctl's write operation would return success and then a subsequent read of the sysctl would indicate that the low and high values are the overflowgid. This patch changes the behavior by clearly returning an error when the sysctl write operation receives a GID range that doesn't map to the associated user namespace. In such a situation, the previous value of the sysctl is preserved and that range will be returned in a subsequent read of the sysctl. Signed-off-by: Tyler Hicks Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/sysctl_net_ipv4.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 51a0039cb318..024ab833557d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -140,8 +140,9 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write, if (write && ret == 0) { low = make_kgid(user_ns, urange[0]); high = make_kgid(user_ns, urange[1]); - if (!gid_valid(low) || !gid_valid(high) || - (urange[1] < urange[0]) || gid_lt(high, low)) { + if (!gid_valid(low) || !gid_valid(high)) + return -EINVAL; + if (urange[1] < urange[0] || gid_lt(high, low)) { low = make_kgid(&init_user_ns, 1); high = make_kgid(&init_user_ns, 0); } From 8582bbfb8629d0dd7f29db5091e308bfcaf7af16 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 17 Jul 2018 17:12:39 +0100 Subject: [PATCH 15/29] ipv6: fix useless rol32 call on hash [ Upstream commit 169dc027fb02492ea37a0575db6a658cf922b854 ] The rol32 call is currently rotating hash but the rol'd value is being discarded. I believe the current code is incorrect and hash should be assigned the rotated value returned from rol32. Thanks to David Lebrun for spotting this. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index e64210c98c2b..407087d686a7 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -794,7 +794,7 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, * to minimize possbility that any useful information to an * attacker is leaked. Only lower 20 bits are relevant. */ - rol32(hash, 16); + hash = rol32(hash, 16); flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; From 09ae0085ceb613fa94d82bc8bba5b5b9e2b9b2f6 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 16 Jul 2018 13:26:13 -0700 Subject: [PATCH 16/29] lib/rhashtable: consider param->min_size when setting initial table size [ Upstream commit 107d01f5ba10f4162c38109496607eb197059064 ] rhashtable_init() currently does not take into account the user-passed min_size parameter unless param->nelem_hint is set as well. As such, the default size (number of buckets) will always be HASH_DEFAULT_SIZE even if the smallest allowed size is larger than that. Remediate this by unconditionally calling into rounded_hashtable_size() and handling things accordingly. Signed-off-by: Davidlohr Bueso Acked-by: Herbert Xu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- lib/rhashtable.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 895961c53385..101dac085c62 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -783,8 +783,16 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_stop); static size_t rounded_hashtable_size(const struct rhashtable_params *params) { - return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), - (unsigned long)params->min_size); + size_t retsize; + + if (params->nelem_hint) + retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3), + (unsigned long)params->min_size); + else + retsize = max(HASH_DEFAULT_SIZE, + (unsigned long)params->min_size); + + return retsize; } static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) @@ -841,8 +849,6 @@ int rhashtable_init(struct rhashtable *ht, struct bucket_table *tbl; size_t size; - size = HASH_DEFAULT_SIZE; - if ((!params->key_len && !params->obj_hashfn) || (params->obj_hashfn && !params->obj_cmpfn)) return -EINVAL; @@ -869,8 +875,7 @@ int rhashtable_init(struct rhashtable *ht, ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE); - if (params->nelem_hint) - size = rounded_hashtable_size(&ht->p); + size = rounded_hashtable_size(&ht->p); /* The maximum (not average) chain length grows with the * size of the hash table, at a rate of (log N)/(log log N). From 66a7cfa05740341e6cceab4694b4ace7ce88475f Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Sat, 7 Jul 2018 16:31:40 +0900 Subject: [PATCH 17/29] net: diag: Don't double-free TCP_NEW_SYN_RECV sockets in tcp_abort [ Upstream commit acc2cf4e37174646a24cba42fa53c668b2338d4e ] When tcp_diag_destroy closes a TCP_NEW_SYN_RECV socket, it first frees it by calling inet_csk_reqsk_queue_drop_and_and_put in tcp_abort, and then frees it again by calling sock_gen_put. Since tcp_abort only has one caller, and all the other codepaths in tcp_abort don't free the socket, just remove the free in that function. Cc: David Ahern Tested: passes Android sock_diag_test.py, which exercises this codepath Fixes: d7226c7a4dd1 ("net: diag: Fix refcnt leak in error path destroying socket") Signed-off-by: Lorenzo Colitti Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 84ffebf0192d..8f15eae3325b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3238,8 +3238,7 @@ int tcp_abort(struct sock *sk, int err) struct request_sock *req = inet_reqsk(sk); local_bh_disable(); - inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, - req); + inet_csk_reqsk_queue_drop(req->rsk_listener, req); local_bh_enable(); return 0; } From f08ca4c8b423c3cbfc254b73d1c12e622bd63c10 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 7 Jul 2018 16:15:26 -0700 Subject: [PATCH 18/29] net/ipv4: Set oif in fib_compute_spec_dst [ Upstream commit e7372197e15856ec4ee66b668020a662994db103 ] Xin reported that icmp replies may not use the address on the device the echo request is received if the destination address is broadcast. Instead a route lookup is done without considering VRF context. Fix by setting oif in flow struct to the master device if it is enslaved. That directs the lookup to the VRF table. If the device is not enslaved, oif is still 0 so no affect. Fixes: cd2fbe1b6b51 ("net: Use VRF device index for lookups on RX") Reported-by: Xin Long Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_frontend.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ffae472e250a..7bdd89354db5 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -290,6 +290,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { struct flowi4 fl4 = { .flowi4_iif = LOOPBACK_IFINDEX, + .flowi4_oif = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), .flowi4_scope = scope, From 77befb4bd1083bb2001851c6a4439b2467055423 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 3 Jul 2018 22:34:54 +0200 Subject: [PATCH 19/29] net: phy: fix flag masking in __set_phy_supported [ Upstream commit df8ed346d4a806a6eef2db5924285e839604b3f9 ] Currently also the pause flags are removed from phydev->supported because they're not included in PHY_DEFAULT_FEATURES. I don't think this is intended, especially when considering that this function can be called via phy_set_max_speed() anywhere in a driver. Change the masking to mask out only the values we're going to change. In addition remove the misleading comment, job of this small function is just to adjust the supported and advertised speeds. Fixes: f3a6bd393c2c ("phylib: Add phy_set_max_speed helper") Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy_device.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index bf02f8e4648a..b131e555d3c2 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1579,11 +1579,8 @@ static int gen10g_resume(struct phy_device *phydev) static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) { - /* The default values for phydev->supported are provided by the PHY - * driver "features" member, we want to reset to sane defaults first - * before supporting higher speeds. - */ - phydev->supported &= PHY_DEFAULT_FEATURES; + phydev->supported &= ~(PHY_1000BT_FEATURES | PHY_100BT_FEATURES | + PHY_10BT_FEATURES); switch (max_speed) { default: From 5ac2bc675cfc0c77da9a1dcd72b6340acbcd8a29 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 17 Jul 2018 20:17:33 -0500 Subject: [PATCH 20/29] ptp: fix missing break in switch [ Upstream commit 9ba8376ce1e2cbf4ce44f7e4bee1d0648e10d594 ] It seems that a *break* is missing in order to avoid falling through to the default case. Otherwise, checking *chan* makes no sense. Fixes: 72df7a7244c0 ("ptp: Allow reassigning calibration pin function") Signed-off-by: Gustavo A. R. Silva Acked-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/ptp/ptp_chardev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 58a97d420572..51364621f77c 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -89,6 +89,7 @@ int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin, case PTP_PF_PHYSYNC: if (chan != 0) return -EINVAL; + break; default: return -EINVAL; } From 323bbb17d4905992c14e5512e2e8b93d29f060cc Mon Sep 17 00:00:00 2001 From: Matevz Vucnik Date: Wed, 4 Jul 2018 18:12:48 +0200 Subject: [PATCH 21/29] qmi_wwan: add support for Quectel EG91 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 38cd58ed9c4e389799b507bcffe02a7a7a180b33 ] This adds the USB id of LTE modem Quectel EG91. It requires the same quirk as other Quectel modems to make it work. Signed-off-by: Matevz Vucnik Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 6d654d65f8a0..31a6d87b61b2 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -953,6 +953,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1e0e, 0x9001, 5)}, /* SIMCom 7230E */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ + {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0306, 4)}, /* Quectel EP06 Mini PCIe */ From dd08f4e69154625200efa7a239ba1147b2d04db9 Mon Sep 17 00:00:00 2001 From: Sanjeev Bansal Date: Mon, 16 Jul 2018 11:13:32 +0530 Subject: [PATCH 22/29] tg3: Add higher cpu clock for 5762. [ Upstream commit 3a498606bb04af603a46ebde8296040b2de350d1 ] This patch has fix for TX timeout while running bi-directional traffic with 100 Mbps using 5762. Signed-off-by: Sanjeev Bansal Signed-off-by: Siva Reddy Kallam Reviewed-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/tg3.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 4ffbe850d746..6250989c83d8 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -9276,6 +9276,15 @@ static int tg3_chip_reset(struct tg3 *tp) tg3_restore_clk(tp); + /* Increase the core clock speed to fix tx timeout issue for 5762 + * with 100Mbps link speed. + */ + if (tg3_asic_rev(tp) == ASIC_REV_5762) { + val = tr32(TG3_CPMU_CLCK_ORIDE_ENABLE); + tw32(TG3_CPMU_CLCK_ORIDE_ENABLE, val | + TG3_CPMU_MAC_ORIDE_ENABLE); + } + /* Reprobe ASF enable state. */ tg3_flag_clear(tp, ENABLE_ASF); tp->phy_flags &= ~(TG3_PHYFLG_1G_ON_VAUX_OK | From c439f620382a2cb0c9df3609e180f59d3ff2ee10 Mon Sep 17 00:00:00 2001 From: Alexander Couzens Date: Tue, 17 Jul 2018 13:17:09 +0200 Subject: [PATCH 23/29] net: usb: asix: replace mii_nway_restart in resume path [ Upstream commit 5c968f48021a9b3faa61ac2543cfab32461c0e05 ] mii_nway_restart is not pm aware which results in a rtnl deadlock. Implement mii_nway_restart manual by setting BMCR_ANRESTART if BMCR_ANENABLE is set. To reproduce: * plug an asix based usb network interface * wait until the device enters PM (~5 sec) * `ip link set eth1 up` will never return Fixes: d9fe64e51114 ("net: asix: Add in_pm parameter") Signed-off-by: Alexander Couzens Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/asix_devices.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 32e9ec8f1521..5be6b67492d5 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -640,10 +640,12 @@ static void ax88772_restore_phy(struct usbnet *dev) priv->presvd_phy_advertise); /* Restore BMCR */ + if (priv->presvd_phy_bmcr & BMCR_ANENABLE) + priv->presvd_phy_bmcr |= BMCR_ANRESTART; + asix_mdio_write_nopm(dev->net, dev->mii.phy_id, MII_BMCR, priv->presvd_phy_bmcr); - mii_nway_restart(&dev->mii); priv->presvd_phy_advertise = 0; priv->presvd_phy_bmcr = 0; } From cad99229aa2048f8b0d9dd7208577603397e65a2 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 11 Jul 2018 14:39:42 +0200 Subject: [PATCH 24/29] net: Don't copy pfmemalloc flag in __copy_skb_header() [ Upstream commit 8b7008620b8452728cadead460a36f64ed78c460 ] The pfmemalloc flag indicates that the skb was allocated from the PFMEMALLOC reserves, and the flag is currently copied on skb copy and clone. However, an skb copied from an skb flagged with pfmemalloc wasn't necessarily allocated from PFMEMALLOC reserves, and on the other hand an skb allocated that way might be copied from an skb that wasn't. So we should not copy the flag on skb copy, and rather decide whether to allow an skb to be associated with sockets unrelated to page reclaim depending only on how it was allocated. Move the pfmemalloc flag before headers_start[0] using an existing 1-bit hole, so that __copy_skb_header() doesn't copy it. When cloning, we'll now take care of this flag explicitly, contravening to the warning comment of __skb_clone(). While at it, restore the newline usage introduced by commit b19372273164 ("net: reorganize sk_buff for faster __copy_skb_header()") to visually separate bytes used in bitfields after headers_start[0], that was gone after commit a9e419dc7be6 ("netfilter: merge ctinfo into nfct pointer storage area"), and describe the pfmemalloc flag in the kernel-doc structure comment. This doesn't change the size of sk_buff or cacheline boundaries, but consolidates the 15 bits hole before tc_index into a 2 bytes hole before csum, that could now be filled more easily. Reported-by: Patrick Talbert Fixes: c93bdd0e03e8 ("netvm: allow skb allocation to use PFMEMALLOC reserves") Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 10 +++++----- net/core/skbuff.c | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1b3a2f95503d..b048d3d3b327 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -602,6 +602,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices * @xmit_more: More SKBs are pending for this queue + * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed * @l4_hash: indicate hash is a canonical 4-tuple hash over transport @@ -692,7 +693,7 @@ struct sk_buff { peeked:1, head_frag:1, xmit_more:1, - __unused:1; /* one bit hole */ + pfmemalloc:1; kmemcheck_bitfield_end(flags1); /* fields enclosed in headers_start/headers_end are copied @@ -712,19 +713,18 @@ struct sk_buff { __u8 __pkt_type_offset[0]; __u8 pkt_type:3; - __u8 pfmemalloc:1; __u8 ignore_df:1; __u8 nfctinfo:3; - __u8 nf_trace:1; + __u8 ip_summed:2; __u8 ooo_okay:1; __u8 l4_hash:1; __u8 sw_hash:1; __u8 wifi_acked_valid:1; __u8 wifi_acked:1; - __u8 no_fcs:1; + /* Indicates the inner headers are valid in the skbuff. */ __u8 encapsulation:1; __u8 encap_hdr_csum:1; @@ -732,11 +732,11 @@ struct sk_buff { __u8 csum_complete_sw:1; __u8 csum_level:2; __u8 csum_bad:1; - #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif __u8 ipvs_property:1; + __u8 inner_protocol_type:1; __u8 remcsum_offload:1; #ifdef CONFIG_NET_SWITCHDEV diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9f697b00158d..36a70b2192db 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -904,6 +904,8 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) n->cloned = 1; n->nohdr = 0; n->peeked = 0; + if (skb->pfmemalloc) + n->pfmemalloc = 1; n->destructor = NULL; C(tail); C(end); From ad375eae791f92deac62a062c176903b6215e6a2 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 13 Jul 2018 13:21:07 +0200 Subject: [PATCH 25/29] skbuff: Unconditionally copy pfmemalloc in __skb_clone() [ Upstream commit e78bfb0751d4e312699106ba7efbed2bab1a53ca ] Commit 8b7008620b84 ("net: Don't copy pfmemalloc flag in __copy_skb_header()") introduced a different handling for the pfmemalloc flag in copy and clone paths. In __skb_clone(), now, the flag is set only if it was set in the original skb, but not cleared if it wasn't. This is wrong and might lead to socket buffers being flagged with pfmemalloc even if the skb data wasn't allocated from pfmemalloc reserves. Copy the flag instead of ORing it. Reported-by: Sabrina Dubroca Fixes: 8b7008620b84 ("net: Don't copy pfmemalloc flag in __copy_skb_header()") Signed-off-by: Stefano Brivio Tested-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 36a70b2192db..8cae7aa4a4ec 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -904,8 +904,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) n->cloned = 1; n->nohdr = 0; n->peeked = 0; - if (skb->pfmemalloc) - n->pfmemalloc = 1; + C(pfmemalloc); n->destructor = NULL; C(tail); C(end); From 33b2110bd92aa830300f9a7209b720fc93f0f713 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 21 Jun 2018 16:19:41 +0300 Subject: [PATCH 26/29] xhci: Fix perceived dead host due to runtime suspend race with event handler commit 229bc19fd7aca4f37964af06e3583c1c8f36b5d6 upstream. Don't rely on event interrupt (EINT) bit alone to detect pending port change in resume. If no change event is detected the host may be suspended again, oterwise roothubs are resumed. There is a lag in xHC setting EINT. If we don't notice the pending change in resume, and the controller is runtime suspeded again, it causes the event handler to assume host is dead as it will fail to read xHC registers once PCI puts the controller to D3 state. [ 268.520969] xhci_hcd: xhci_resume: starting port polling. [ 268.520985] xhci_hcd: xhci_hub_status_data: stopping port polling. [ 268.521030] xhci_hcd: xhci_suspend: stopping port polling. [ 268.521040] xhci_hcd: // Setting command ring address to 0x349bd001 [ 268.521139] xhci_hcd: Port Status Change Event for port 3 [ 268.521149] xhci_hcd: resume root hub [ 268.521163] xhci_hcd: port resume event for port 3 [ 268.521168] xhci_hcd: xHC is not running. [ 268.521174] xhci_hcd: handle_port_status: starting port polling. [ 268.596322] xhci_hcd: xhci_hc_died: xHCI host controller not responding, assume dead The EINT lag is described in a additional note in xhci specs 4.19.2: "Due to internal xHC scheduling and system delays, there will be a lag between a change bit being set and the Port Status Change Event that it generated being written to the Event Ring. If SW reads the PORTSC and sees a change bit set, there is no guarantee that the corresponding Port Status Change Event has already been written into the Event Ring." Cc: Signed-off-by: Mathias Nyman Signed-off-by: Kai-Heng Feng Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 40 +++++++++++++++++++++++++++++++++++++--- drivers/usb/host/xhci.h | 4 ++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index a7d239f5fc5f..81f25213cb41 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -891,6 +891,41 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci) spin_unlock_irqrestore(&xhci->lock, flags); } +static bool xhci_pending_portevent(struct xhci_hcd *xhci) +{ + __le32 __iomem **port_array; + int port_index; + u32 status; + u32 portsc; + + status = readl(&xhci->op_regs->status); + if (status & STS_EINT) + return true; + /* + * Checking STS_EINT is not enough as there is a lag between a change + * bit being set and the Port Status Change Event that it generated + * being written to the Event Ring. See note in xhci 1.1 section 4.19.2. + */ + + port_index = xhci->num_usb2_ports; + port_array = xhci->usb2_ports; + while (port_index--) { + portsc = readl(port_array[port_index]); + if (portsc & PORT_CHANGE_MASK || + (portsc & PORT_PLS_MASK) == XDEV_RESUME) + return true; + } + port_index = xhci->num_usb3_ports; + port_array = xhci->usb3_ports; + while (port_index--) { + portsc = readl(port_array[port_index]); + if (portsc & PORT_CHANGE_MASK || + (portsc & PORT_PLS_MASK) == XDEV_RESUME) + return true; + } + return false; +} + /* * Stop HC (not bus-specific) * @@ -987,7 +1022,7 @@ EXPORT_SYMBOL_GPL(xhci_suspend); */ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) { - u32 command, temp = 0, status; + u32 command, temp = 0; struct usb_hcd *hcd = xhci_to_hcd(xhci); struct usb_hcd *secondary_hcd; int retval = 0; @@ -1109,8 +1144,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) done: if (retval == 0) { /* Resume root hubs only when have pending events. */ - status = readl(&xhci->op_regs->status); - if (status & STS_EINT) { + if (xhci_pending_portevent(xhci)) { usb_hcd_resume_root_hub(xhci->shared_hcd); usb_hcd_resume_root_hub(hcd); } diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 836398ade58d..b9181281aa9e 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -385,6 +385,10 @@ struct xhci_op_regs { #define PORT_PLC (1 << 22) /* port configure error change - port failed to configure its link partner */ #define PORT_CEC (1 << 23) +#define PORT_CHANGE_MASK (PORT_CSC | PORT_PEC | PORT_WRC | PORT_OCC | \ + PORT_RC | PORT_PLC | PORT_CEC) + + /* Cold Attach Status - xHC can set this bit to report device attached during * Sx state. Warm port reset should be perfomed to clear this bit and move port * to connected state. From 2ea8b93c0310e8092daaa422376f08bf3b321de1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:35:46 -0400 Subject: [PATCH 27/29] xprtrdma: Return -ENOBUFS when no pages are available commit a8f688ec437dc2045cc8f0c89fe877d5803850da upstream. The use of -EAGAIN in rpcrdma_convert_iovs() is a latent bug: the transport never calls xprt_write_space() when more pages become available. -ENOBUFS will trigger the correct "delay briefly and call again" logic. Fixes: 7a89f9c626e3 ("xprtrdma: Honor ->send_request API contract") Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Anna Schumaker Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/rpc_rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index f57c9f0ab8f9..0287734f126f 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -229,7 +229,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, /* alloc the pagelist for receiving buffer */ ppages[p] = alloc_page(GFP_ATOMIC); if (!ppages[p]) - return -EAGAIN; + return -ENOBUFS; } seg[n].mr_page = ppages[p]; seg[n].mr_offset = (void *)(unsigned long) page_base; From 3118ceb456200d73179ddf29f2b95f59e35465f9 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Thu, 12 Apr 2018 19:11:58 +0100 Subject: [PATCH 28/29] block: do not use interruptible wait anywhere commit 1dc3039bc87ae7d19a990c3ee71cfd8a9068f428 upstream. When blk_queue_enter() waits for a queue to unfreeze, or unset the PREEMPT_ONLY flag, do not allow it to be interrupted by a signal. The PREEMPT_ONLY flag was introduced later in commit 3a0a529971ec ("block, scsi: Make SCSI quiesce and resume work reliably"). Note the SCSI device is resumed asynchronously, i.e. after un-freezing userspace tasks. So that commit exposed the bug as a regression in v4.15. A mysterious SIGBUS (or -EIO) sometimes happened during the time the device was being resumed. Most frequently, there was no kernel log message, and we saw Xorg or Xwayland killed by SIGBUS.[1] [1] E.g. https://bugzilla.redhat.com/show_bug.cgi?id=1553979 Without this fix, I get an IO error in this test: # dd if=/dev/sda of=/dev/null iflag=direct & \ while killall -SIGUSR1 dd; do sleep 0.1; done & \ echo mem > /sys/power/state ; \ sleep 5; killall dd # stop after 5 seconds The interruptible wait was added to blk_queue_enter in commit 3ef28e83ab15 ("block: generic request_queue reference counting"). Before then, the interruptible wait was only in blk-mq, but I don't think it could ever have been correct. Reviewed-by: Bart Van Assche Cc: stable@vger.kernel.org Signed-off-by: Alan Jenkins Signed-off-by: Jens Axboe Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- block/blk-core.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 23daf40be371..77b99bf16c83 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -652,7 +652,6 @@ EXPORT_SYMBOL(blk_alloc_queue); int blk_queue_enter(struct request_queue *q, bool nowait) { while (true) { - int ret; if (percpu_ref_tryget_live(&q->q_usage_counter)) return 0; @@ -660,13 +659,11 @@ int blk_queue_enter(struct request_queue *q, bool nowait) if (nowait) return -EBUSY; - ret = wait_event_interruptible(q->mq_freeze_wq, - !atomic_read(&q->mq_freeze_depth) || - blk_queue_dying(q)); + wait_event(q->mq_freeze_wq, + !atomic_read(&q->mq_freeze_depth) || + blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; - if (ret) - return ret; } } From dbcdf42bab53d219caa51bcfe54c8b9066010290 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 25 Jul 2018 11:24:03 +0200 Subject: [PATCH 29/29] Linux 4.9.115 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f4cd42c9b940..889c58e39928 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 114 +SUBLEVEL = 115 EXTRAVERSION = NAME = Roaring Lionus