From dd0b0e22a58875e0ea9f41fc177f2286dddc95ab Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 20 Oct 2016 16:48:12 -0700 Subject: [PATCH 01/25] drm/vc4: Fix termination of the initial scan for branch targets. commit 457e67a728696c4f8e6423c64e93def50530db9a upstream. The loop is scanning until the original max_ip (size of the BO), but we want to not examine any code after the PROG_END's delay slots. There was a block trying to do that, except that we had some early continue statements if the signal wasn't a PROG_END or a BRANCH. The failure mode would be that a valid shader is rejected because some undefined memory after the PROG_END slots is parsed as a branch and the rest of its setup is illegal. I haven't seen this in the wild, but valgrind was complaining when about this up in the userland simulator mode. Signed-off-by: Eric Anholt Cc: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_validate_shaders.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index 2543cf5b8b51..917321ce832f 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -608,9 +608,7 @@ static bool vc4_validate_branches(struct vc4_shader_validation_state *validation_state) { uint32_t max_branch_target = 0; - bool found_shader_end = false; int ip; - int shader_end_ip = 0; int last_branch = -2; for (ip = 0; ip < validation_state->max_ip; ip++) { @@ -621,8 +619,13 @@ vc4_validate_branches(struct vc4_shader_validation_state *validation_state) uint32_t branch_target_ip; if (sig == QPU_SIG_PROG_END) { - shader_end_ip = ip; - found_shader_end = true; + /* There are two delay slots after program end is + * signaled that are still executed, then we're + * finished. validation_state->max_ip is the + * instruction after the last valid instruction in the + * program. + */ + validation_state->max_ip = ip + 3; continue; } @@ -676,15 +679,9 @@ vc4_validate_branches(struct vc4_shader_validation_state *validation_state) } set_bit(after_delay_ip, validation_state->branch_targets); max_branch_target = max(max_branch_target, after_delay_ip); - - /* There are two delay slots after program end is signaled - * that are still executed, then we're finished. - */ - if (found_shader_end && ip == shader_end_ip + 2) - break; } - if (max_branch_target > shader_end_ip) { + if (max_branch_target > validation_state->max_ip - 3) { DRM_ERROR("Branch landed after QPU_SIG_PROG_END"); return false; } From 2bbcbc24474e3e0726bdff893b8a586da44d9b12 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 4 Nov 2016 15:58:38 -0700 Subject: [PATCH 02/25] drm/vc4: Use runtime autosuspend to avoid thrashing V3D power state. commit 3a62234680d86efa0239665ed8a0e908f1aef147 upstream. The pm_runtime_put() we were using immediately released power on the device, which meant that we were generally turning the device off and on once per frame. In many profiles I've looked at, that added up to about 1% of CPU time, but this could get worse in the case of frequent rendering and readback (as may happen in X rendering). By keeping the device on until we've been idle for a couple of frames, we drop the overhead of runtime PM down to sub-.1%. Signed-off-by: Eric Anholt Cc: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_drv.c | 9 ++++++--- drivers/gpu/drm/vc4/vc4_gem.c | 6 ++++-- drivers/gpu/drm/vc4/vc4_v3d.c | 2 ++ 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 8703f56b7947..246d1aea87bc 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -61,21 +61,24 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, if (ret < 0) return ret; args->value = V3D_READ(V3D_IDENT0); - pm_runtime_put(&vc4->v3d->pdev->dev); + pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); + pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); break; case DRM_VC4_PARAM_V3D_IDENT1: ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); if (ret < 0) return ret; args->value = V3D_READ(V3D_IDENT1); - pm_runtime_put(&vc4->v3d->pdev->dev); + pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); + pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); break; case DRM_VC4_PARAM_V3D_IDENT2: ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); if (ret < 0) return ret; args->value = V3D_READ(V3D_IDENT2); - pm_runtime_put(&vc4->v3d->pdev->dev); + pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); + pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); break; case DRM_VC4_PARAM_SUPPORTS_BRANCHES: args->value = true; diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 18e37171e9c8..ab3016982466 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -711,8 +711,10 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) } mutex_lock(&vc4->power_lock); - if (--vc4->power_refcount == 0) - pm_runtime_put(&vc4->v3d->pdev->dev); + if (--vc4->power_refcount == 0) { + pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); + pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); + } mutex_unlock(&vc4->power_lock); kfree(exec); diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index e6d3c6028341..7cc346ad9b0b 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -222,6 +222,8 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) return ret; } + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, 40); /* a little over 2 frames. */ pm_runtime_enable(dev); return 0; From 85f687708c811563df33c70c9cec9b5c2733acf8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 2 Mar 2017 12:17:22 -0800 Subject: [PATCH 03/25] give up on gcc ilog2() constant optimizations commit 474c90156c8dcc2fa815e6716cc9394d7930cb9c upstream. gcc-7 has an "optimization" pass that completely screws up, and generates the code expansion for the (impossible) case of calling ilog2() with a zero constant, even when the code gcc compiles does not actually have a zero constant. And we try to generate a compile-time error for anybody doing ilog2() on a constant where that doesn't make sense (be it zero or negative). So now gcc7 will fail the build due to our sanity checking, because it created that constant-zero case that didn't actually exist in the source code. There's a whole long discussion on the kernel mailing about how to work around this gcc bug. The gcc people themselevs have discussed their "feature" in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72785 but it's all water under the bridge, because while it looked at one point like it would be solved by the time gcc7 was released, that was not to be. So now we have to deal with this compiler braindamage. And the only simple approach seems to be to just delete the code that tries to warn about bad uses of ilog2(). So now "ilog2()" will just return 0 not just for the value 1, but for any non-positive value too. It's not like I can recall anybody having ever actually tried to use this function on any invalid value, but maybe the sanity check just meant that such code never made it out in public. Reported-by: Laura Abbott Cc: John Stultz , Cc: Thomas Gleixner Cc: Ard Biesheuvel Signed-off-by: Linus Torvalds Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/log2.h | 13 ++----------- tools/include/linux/log2.h | 13 ++----------- 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/include/linux/log2.h b/include/linux/log2.h index fd7ff3d91e6a..f38fae23bdac 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -15,12 +15,6 @@ #include #include -/* - * deal with unrepresentable constant logarithms - */ -extern __attribute__((const, noreturn)) -int ____ilog2_NaN(void); - /* * non-constant log of base 2 calculators * - the arch may override these in asm/bitops.h if they can be implemented @@ -85,7 +79,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) #define ilog2(n) \ ( \ __builtin_constant_p(n) ? ( \ - (n) < 1 ? ____ilog2_NaN() : \ + (n) < 2 ? 0 : \ (n) & (1ULL << 63) ? 63 : \ (n) & (1ULL << 62) ? 62 : \ (n) & (1ULL << 61) ? 61 : \ @@ -148,10 +142,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) (n) & (1ULL << 4) ? 4 : \ (n) & (1ULL << 3) ? 3 : \ (n) & (1ULL << 2) ? 2 : \ - (n) & (1ULL << 1) ? 1 : \ - (n) & (1ULL << 0) ? 0 : \ - ____ilog2_NaN() \ - ) : \ + 1 ) : \ (sizeof(n) <= 4) ? \ __ilog2_u32(n) : \ __ilog2_u64(n) \ diff --git a/tools/include/linux/log2.h b/tools/include/linux/log2.h index 41446668ccce..d5677d39c1e4 100644 --- a/tools/include/linux/log2.h +++ b/tools/include/linux/log2.h @@ -12,12 +12,6 @@ #ifndef _TOOLS_LINUX_LOG2_H #define _TOOLS_LINUX_LOG2_H -/* - * deal with unrepresentable constant logarithms - */ -extern __attribute__((const, noreturn)) -int ____ilog2_NaN(void); - /* * non-constant log of base 2 calculators * - the arch may override these in asm/bitops.h if they can be implemented @@ -78,7 +72,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) #define ilog2(n) \ ( \ __builtin_constant_p(n) ? ( \ - (n) < 1 ? ____ilog2_NaN() : \ + (n) < 2 ? 0 : \ (n) & (1ULL << 63) ? 63 : \ (n) & (1ULL << 62) ? 62 : \ (n) & (1ULL << 61) ? 61 : \ @@ -141,10 +135,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) (n) & (1ULL << 4) ? 4 : \ (n) & (1ULL << 3) ? 3 : \ (n) & (1ULL << 2) ? 2 : \ - (n) & (1ULL << 1) ? 1 : \ - (n) & (1ULL << 0) ? 0 : \ - ____ilog2_NaN() \ - ) : \ + 1 ) : \ (sizeof(n) <= 4) ? \ __ilog2_u32(n) : \ __ilog2_u64(n) \ From c5ad350d61441ea18869f910e60ae1b58a57d850 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:44 -0700 Subject: [PATCH 04/25] qla2xxx: Fix memory leak for abts processing commit ae940f2c472a62904dc18234de5cf3ed28f195ee upstream. Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_target.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index bff9689f5ca9..c6c3b97a1e93 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -6794,6 +6794,8 @@ qlt_handle_abts_recv_work(struct work_struct *work) spin_lock_irqsave(&ha->hardware_lock, flags); qlt_response_pkt_all_vps(vha, (response_t *)&op->atio); spin_unlock_irqrestore(&ha->hardware_lock, flags); + + kfree(op); } void From 66e70bdca599abd2dc42c5849ae21895de141bfe Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:45 -0700 Subject: [PATCH 05/25] qla2xxx: Fix request queue corruption. commit 8b666809e10cda9814af3e8be339d35b83909056 upstream. When FW notify driver or driver detects low FW resource, driver tries to send out Busy SCSI Status to tell Initiator side to back off. During the send process, the lock was not held. Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_target.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index c6c3b97a1e93..feab7ea8e823 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -5375,16 +5375,22 @@ qlt_send_busy(struct scsi_qla_host *vha, static int qlt_chk_qfull_thresh_hold(struct scsi_qla_host *vha, - struct atio_from_isp *atio) + struct atio_from_isp *atio, bool ha_locked) { struct qla_hw_data *ha = vha->hw; uint16_t status; + unsigned long flags; if (ha->tgt.num_pend_cmds < Q_FULL_THRESH_HOLD(ha)) return 0; + if (!ha_locked) + spin_lock_irqsave(&ha->hardware_lock, flags); status = temp_sam_status; qlt_send_busy(vha, atio, status); + if (!ha_locked) + spin_unlock_irqrestore(&ha->hardware_lock, flags); + return 1; } @@ -5429,7 +5435,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha, if (likely(atio->u.isp24.fcp_cmnd.task_mgmt_flags == 0)) { - rc = qlt_chk_qfull_thresh_hold(vha, atio); + rc = qlt_chk_qfull_thresh_hold(vha, atio, ha_locked); if (rc != 0) { tgt->atio_irq_cmd_count--; return; @@ -5552,7 +5558,7 @@ static void qlt_response_pkt(struct scsi_qla_host *vha, response_t *pkt) break; } - rc = qlt_chk_qfull_thresh_hold(vha, atio); + rc = qlt_chk_qfull_thresh_hold(vha, atio, true); if (rc != 0) { tgt->irq_cmd_count--; return; From a690a42ae7b90ceb761816fb33ee39058962d58e Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sat, 11 Mar 2017 18:03:34 -0500 Subject: [PATCH 06/25] parisc: Optimize flush_kernel_vmap_range and invalidate_kernel_vmap_range commit 316ec0624f951166daedbe446988ef92ae72b59f upstream. The previously submitted patch did not resolve the random segmentation faults observed on the phantom buildd system. There are still unresolved problems with the Debian 4.8 and 4.9 kernels on C8000. The attached patch removes the flush of the offset map pages and does a whole data cache flush for large ranges. No other arch flushes the offset map in these routines as far as I can tell. I have not observed any random segmentation faults on rp3440 in two weeks of testing with 4.10.0 and 4.10.1. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/cacheflush.h | 23 ++--------------------- arch/parisc/kernel/cache.c | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 7bd69bd43a01..1d8c24dc04d4 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -45,28 +45,9 @@ static inline void flush_kernel_dcache_page(struct page *page) #define flush_kernel_dcache_range(start,size) \ flush_kernel_dcache_range_asm((start), (start)+(size)); -/* vmap range flushes and invalidates. Architecturally, we don't need - * the invalidate, because the CPU should refuse to speculate once an - * area has been flushed, so invalidate is left empty */ -static inline void flush_kernel_vmap_range(void *vaddr, int size) -{ - unsigned long start = (unsigned long)vaddr; - flush_kernel_dcache_range_asm(start, start + size); -} -static inline void invalidate_kernel_vmap_range(void *vaddr, int size) -{ - unsigned long start = (unsigned long)vaddr; - void *cursor = vaddr; - - for ( ; cursor < vaddr + size; cursor += PAGE_SIZE) { - struct page *page = vmalloc_to_page(cursor); - - if (test_and_clear_bit(PG_dcache_dirty, &page->flags)) - flush_kernel_dcache_page(page); - } - flush_kernel_dcache_range_asm(start, start + size); -} +void flush_kernel_vmap_range(void *vaddr, int size); +void invalidate_kernel_vmap_range(void *vaddr, int size); #define flush_cache_vmap(start, end) flush_cache_all() #define flush_cache_vunmap(start, end) flush_cache_all() diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 977f0a4f5ecf..53ec75f8e237 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -633,3 +633,25 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); } } + +void flush_kernel_vmap_range(void *vaddr, int size) +{ + unsigned long start = (unsigned long)vaddr; + + if ((unsigned long)size > parisc_cache_flush_threshold) + flush_data_cache(); + else + flush_kernel_dcache_range_asm(start, start + size); +} +EXPORT_SYMBOL(flush_kernel_vmap_range); + +void invalidate_kernel_vmap_range(void *vaddr, int size) +{ + unsigned long start = (unsigned long)vaddr; + + if ((unsigned long)size > parisc_cache_flush_threshold) + flush_data_cache(); + else + flush_kernel_dcache_range_asm(start, start + size); +} +EXPORT_SYMBOL(invalidate_kernel_vmap_range); From 13695ce5b1024b38cfecbd334ec5ddcaac1b6807 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 18 Mar 2017 17:13:27 +0100 Subject: [PATCH 07/25] parisc: Fix system shutdown halt commit 73580dac7618e4bcd21679f553cf3c97323fec46 upstream. On those parisc machines which don't provide a software power off function, the system currently kills the init process at the end of a shutdown and unexpectedly restarts insteads of halting. Fix it by adding a loop which will not return. Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 40639439d8b3..e81afc378850 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -139,6 +139,8 @@ void machine_power_off(void) printk(KERN_EMERG "System shut down completed.\n" "Please power this system off now."); + + for (;;); } void (*pm_power_off)(void) = machine_power_off; From c04a938229e79ff52c4b9f027b89f544aeb3234a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Mar 2017 13:47:48 +0100 Subject: [PATCH 08/25] perf/core: Fix use-after-free in perf_release() commit e552a8389aa409e257b7dcba74f67f128f979ccc upstream. Dmitry reported syzcaller tripped a use-after-free in perf_release(). After much puzzlement Oleg spotted the below scenario: Task1 Task2 fork() perf_event_init_task() /* ... */ goto bad_fork_$foo; /* ... */ perf_event_free_task() mutex_lock(ctx->lock) perf_free_event(B) perf_event_release_kernel(A) mutex_lock(A->child_mutex) list_for_each_entry(child, ...) { /* child == B */ ctx = B->ctx; get_ctx(ctx); mutex_unlock(A->child_mutex); mutex_lock(A->child_mutex) list_del_init(B->child_list) mutex_unlock(A->child_mutex) /* ... */ mutex_unlock(ctx->lock); put_ctx() /* >0 */ free_task(); mutex_lock(ctx->lock); mutex_lock(A->child_mutex); /* ... */ mutex_unlock(A->child_mutex); mutex_unlock(ctx->lock) put_ctx() /* 0 */ ctx->task && !TOMBSTONE put_task_struct() /* UAF */ This patch closes the hole by making perf_event_free_task() destroy the task <-> ctx relation such that perf_event_release_kernel() will no longer observe the now dead task. Spotted-by: Oleg Nesterov Reported-by: Dmitry Vyukov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: fweisbec@gmail.com Cc: oleg@redhat.com Fixes: c6e5b73242d2 ("perf: Synchronously clean up child events") Link: http://lkml.kernel.org/r/20170314155949.GE32474@worktop Link: http://lkml.kernel.org/r/20170316125823.140295131@infradead.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4b3323151a2f..ef05f8c15f95 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10333,6 +10333,17 @@ void perf_event_free_task(struct task_struct *task) continue; mutex_lock(&ctx->mutex); + raw_spin_lock_irq(&ctx->lock); + /* + * Destroy the task <-> ctx relation and mark the context dead. + * + * This is important because even though the task hasn't been + * exposed yet the context has been (through child_list). + */ + RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], NULL); + WRITE_ONCE(ctx->task, TASK_TOMBSTONE); + put_task_struct(task); /* cannot be last */ + raw_spin_unlock_irq(&ctx->lock); again: list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) From 69efd8e21246b56125e94dc4292d02423c14b49e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Mar 2017 13:47:49 +0100 Subject: [PATCH 09/25] perf/core: Fix event inheritance on fork() commit e7cc4865f0f31698ef2f7aac01a50e78968985b7 upstream. While hunting for clues to a use-after-free, Oleg spotted that perf_event_init_context() can loose an error value with the result that fork() can succeed even though we did not fully inherit the perf event context. Spotted-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Dmitry Vyukov Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: oleg@redhat.com Fixes: 889ff0150661 ("perf/core: Split context's event group list into pinned and non-pinned lists") Link: http://lkml.kernel.org/r/20170316125823.190342547@infradead.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index ef05f8c15f95..07c0dc806dfc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10597,7 +10597,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn) ret = inherit_task_group(event, parent, parent_ctx, child, ctxn, &inherited_all); if (ret) - break; + goto out_unlock; } /* @@ -10613,7 +10613,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn) ret = inherit_task_group(event, parent, parent_ctx, child, ctxn, &inherited_all); if (ret) - break; + goto out_unlock; } raw_spin_lock_irqsave(&parent_ctx->lock, flags); @@ -10641,6 +10641,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn) } raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); +out_unlock: mutex_unlock(&parent_ctx->mutex); perf_unpin_context(parent_ctx); From 87144ec25091a516c1674494bd95f8966a480eec Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 11 Mar 2017 15:52:47 -0500 Subject: [PATCH 10/25] xprtrdma: Squelch kbuild sparse complaint commit eed50879d64ab1b9f76445dbab822e43a098b309 upstream. New complaint from kbuild for 4.9.y: net/sunrpc/xprtrdma/verbs.c:489:19: sparse: incompatible types in comparison expression (different type sizes) verbs.c: 489 max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES); I can't reproduce this running sparse here. Likewise, "make W=1 net/sunrpc/xprtrdma/verbs.o" never indicated any issue. A little poking suggests that because the range of its values is small, gcc can make the actual width of RPCRDMA_MAX_SEND_SGES smaller than the width of an unsigned integer. Fixes: 16f906d66cd7 ("xprtrdma: Reduce required number of send SGEs") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/verbs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index e2c37061edbe..69502fa68a3c 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -486,7 +486,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct ib_cq *sendcq, *recvcq; int rc; - max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES); + max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge, + RPCRDMA_MAX_SEND_SGES); if (max_sge < RPCRDMA_MIN_SEND_SGES) { pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); return -ENOMEM; From b76d4fb2d9445e29f003308607c65b882b117b26 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 13 Mar 2017 10:36:19 -0400 Subject: [PATCH 11/25] NFS prevent double free in async nfs4_exchange_id commit 63513232f8cd219dcaa5eafae028740ed3067d83 upstream. Since rpc_task is async, the release function should be called which will free the impl_id, scope, and owner. Trond pointed at 2 more problems: -- use of client pointer after free in the nfs4_exchangeid_release() function -- cl_count mismatch if rpc_run_task() isn't run Fixes: 8d89bd70bc9 ("NFS setup async exchange_id") Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 609840de31d3..1536aeb0abab 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7426,11 +7426,11 @@ static void nfs4_exchange_id_release(void *data) struct nfs41_exchange_id_data *cdata = (struct nfs41_exchange_id_data *)data; - nfs_put_client(cdata->args.client); if (cdata->xprt) { xprt_put(cdata->xprt); rpc_clnt_xprt_switch_put(cdata->args.client->cl_rpcclient); } + nfs_put_client(cdata->args.client); kfree(cdata->res.impl_id); kfree(cdata->res.server_scope); kfree(cdata->res.server_owner); @@ -7537,10 +7537,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, task_setup_data.callback_data = calldata; task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) { - status = PTR_ERR(task); - goto out_impl_id; - } + if (IS_ERR(task)) + return PTR_ERR(task); if (!xprt) { status = rpc_wait_for_completion_task(task); @@ -7568,6 +7566,7 @@ out_server_owner: kfree(calldata->res.server_owner); out_calldata: kfree(calldata); + nfs_put_client(clp); goto out; } From bb8c61ad784d71620e234ac02f8550f405cb9d33 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 15 Mar 2017 00:12:16 +0100 Subject: [PATCH 12/25] cpufreq: Fix and clean up show_cpuinfo_cur_freq() commit 9b4f603e7a9f4282aec451063ffbbb8bb410dcd9 upstream. There is a missing newline in show_cpuinfo_cur_freq(), so add it, but while at it clean that function up somewhat too. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 6e6c1fb60fbc..272608f102fb 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -680,9 +680,11 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, char *buf) { unsigned int cur_freq = __cpufreq_get(policy); - if (!cur_freq) - return sprintf(buf, ""); - return sprintf(buf, "%u\n", cur_freq); + + if (cur_freq) + return sprintf(buf, "%u\n", cur_freq); + + return sprintf(buf, "\n"); } /** From 4265e0b487da6427105e8238679a237704557945 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 7 Mar 2017 16:14:49 +1100 Subject: [PATCH 13/25] powerpc/boot: Fix zImage TOC alignment commit 97ee351b50a49717543533cfb85b4bf9d88c9680 upstream. Recent toolchains force the TOC to be 256 byte aligned. We need to enforce this alignment in the zImage linker script, otherwise pointers to our TOC variables (__toc_start) could be incorrect. If the actual start of the TOC and __toc_start don't have the same value we crash early in the zImage wrapper. Suggested-by: Alan Modra Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/boot/zImage.lds.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S index 861e72109df2..f080abfc2f83 100644 --- a/arch/powerpc/boot/zImage.lds.S +++ b/arch/powerpc/boot/zImage.lds.S @@ -68,6 +68,7 @@ SECTIONS } #ifdef CONFIG_PPC64_BOOT_WRAPPER + . = ALIGN(256); .got : { __toc_start = .; From ad5166415ff3178cc75331dc6366ea8f4e48207d Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 28 Feb 2017 13:00:20 -0800 Subject: [PATCH 14/25] md/raid1/10: fix potential deadlock commit 61eb2b43b99ebdc9bc6bc83d9792257b243e7cb3 upstream. Neil Brown pointed out a potential deadlock in raid 10 code with bio_split/chain. The raid1 code could have the same issue, but recent barrier rework makes it less likely to happen. The deadlock happens in below sequence: 1. generic_make_request(bio), this will set current->bio_list 2. raid10_make_request will split bio to bio1 and bio2 3. __make_request(bio1), wait_barrer, add underlayer disk bio to current->bio_list 4. __make_request(bio2), wait_barrer If raise_barrier happens between 3 & 4, since wait_barrier runs at 3, raise_barrier waits for IO completion from 3. And since raise_barrier sets barrier, 4 waits for raise_barrier. But IO from 3 can't be dispatched because raid10_make_request() doesn't finished yet. The solution is to adjust the IO ordering. Quotes from Neil: " It is much safer to: if (need to split) { split = bio_split(bio, ...) bio_chain(...) make_request_fn(split); generic_make_request(bio); } else make_request_fn(mddev, bio); This way we first process the initial section of the bio (in 'split') which will queue some requests to the underlying devices. These requests will be queued in generic_make_request. Then we queue the remainder of the bio, which will be added to the end of the generic_make_request queue. Then we return. generic_make_request() will pop the lower-level device requests off the queue and handle them first. Then it will process the remainder of the original bio once the first section has been fully processed. " Note, this only happens in read path. In write path, the bio is flushed to underlaying disks either by blk flush (from schedule) or offladed to raid1/10d. It's queued in current->bio_list. Cc: Coly Li Suggested-by: NeilBrown Reviewed-by: Jack Wang Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 39fddda2fef2..55b5e0e77b17 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1470,7 +1470,25 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio) split = bio; } + /* + * If a bio is splitted, the first part of bio will pass + * barrier but the bio is queued in current->bio_list (see + * generic_make_request). If there is a raise_barrier() called + * here, the second part of bio can't pass barrier. But since + * the first part bio isn't dispatched to underlaying disks + * yet, the barrier is never released, hence raise_barrier will + * alays wait. We have a deadlock. + * Note, this only happens in read path. For write path, the + * first part of bio is dispatched in a schedule() call + * (because of blk plug) or offloaded to raid10d. + * Quitting from the function immediately can change the bio + * order queued in bio_list and avoid the deadlock. + */ __make_request(mddev, split); + if (split != bio && bio_data_dir(bio) == READ) { + generic_make_request(bio); + break; + } } while (split != bio); /* In case raid10d snuck in to freeze_array */ From f8e24eab9bcd8aded3e5940cf5baaa4512645ecc Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 3 Nov 2016 23:06:53 -0700 Subject: [PATCH 15/25] target/pscsi: Fix TYPE_TAPE + TYPE_MEDIMUM_CHANGER export commit a04e54f2c35823ca32d56afcd5cea5b783e2f51a upstream. The following fixes a divide by zero OOPs with TYPE_TAPE due to pscsi_tape_read_blocksize() failing causing a zero sd->sector_size being propigated up via dev_attrib.hw_block_size. It also fixes another long-standing bug where TYPE_TAPE and TYPE_MEDIMUM_CHANGER where using pscsi_create_type_other(), which does not call scsi_device_get() to take the device reference. Instead, rename pscsi_create_type_rom() to pscsi_create_type_nondisk() and use it for all cases. Finally, also drop a dump_stack() in pscsi_get_blocks() for non TYPE_DISK, which in modern target-core can get invoked via target_sense_desc_format() during CHECK_CONDITION. Reported-by: Malcolm Haak Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_pscsi.c | 47 ++++++++---------------------- 1 file changed, 12 insertions(+), 35 deletions(-) diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 9125d9358dea..ef1c8c158f66 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -154,7 +154,7 @@ static void pscsi_tape_read_blocksize(struct se_device *dev, buf = kzalloc(12, GFP_KERNEL); if (!buf) - return; + goto out_free; memset(cdb, 0, MAX_COMMAND_SIZE); cdb[0] = MODE_SENSE; @@ -169,9 +169,10 @@ static void pscsi_tape_read_blocksize(struct se_device *dev, * If MODE_SENSE still returns zero, set the default value to 1024. */ sdev->sector_size = (buf[9] << 16) | (buf[10] << 8) | (buf[11]); +out_free: if (!sdev->sector_size) sdev->sector_size = 1024; -out_free: + kfree(buf); } @@ -314,9 +315,10 @@ static int pscsi_add_device_to_list(struct se_device *dev, sd->lun, sd->queue_depth); } - dev->dev_attrib.hw_block_size = sd->sector_size; + dev->dev_attrib.hw_block_size = + min_not_zero((int)sd->sector_size, 512); dev->dev_attrib.hw_max_sectors = - min_t(int, sd->host->max_sectors, queue_max_hw_sectors(q)); + min_not_zero(sd->host->max_sectors, queue_max_hw_sectors(q)); dev->dev_attrib.hw_queue_depth = sd->queue_depth; /* @@ -339,8 +341,10 @@ static int pscsi_add_device_to_list(struct se_device *dev, /* * For TYPE_TAPE, attempt to determine blocksize with MODE_SENSE. */ - if (sd->type == TYPE_TAPE) + if (sd->type == TYPE_TAPE) { pscsi_tape_read_blocksize(dev, sd); + dev->dev_attrib.hw_block_size = sd->sector_size; + } return 0; } @@ -406,7 +410,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd) /* * Called with struct Scsi_Host->host_lock called. */ -static int pscsi_create_type_rom(struct se_device *dev, struct scsi_device *sd) +static int pscsi_create_type_nondisk(struct se_device *dev, struct scsi_device *sd) __releases(sh->host_lock) { struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr; @@ -433,28 +437,6 @@ static int pscsi_create_type_rom(struct se_device *dev, struct scsi_device *sd) return 0; } -/* - * Called with struct Scsi_Host->host_lock called. - */ -static int pscsi_create_type_other(struct se_device *dev, - struct scsi_device *sd) - __releases(sh->host_lock) -{ - struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr; - struct Scsi_Host *sh = sd->host; - int ret; - - spin_unlock_irq(sh->host_lock); - ret = pscsi_add_device_to_list(dev, sd); - if (ret) - return ret; - - pr_debug("CORE_PSCSI[%d] - Added Type: %s for %d:%d:%d:%llu\n", - phv->phv_host_id, scsi_device_type(sd->type), sh->host_no, - sd->channel, sd->id, sd->lun); - return 0; -} - static int pscsi_configure_device(struct se_device *dev) { struct se_hba *hba = dev->se_hba; @@ -542,11 +524,8 @@ static int pscsi_configure_device(struct se_device *dev) case TYPE_DISK: ret = pscsi_create_type_disk(dev, sd); break; - case TYPE_ROM: - ret = pscsi_create_type_rom(dev, sd); - break; default: - ret = pscsi_create_type_other(dev, sd); + ret = pscsi_create_type_nondisk(dev, sd); break; } @@ -611,8 +590,7 @@ static void pscsi_free_device(struct se_device *dev) else if (pdv->pdv_lld_host) scsi_host_put(pdv->pdv_lld_host); - if ((sd->type == TYPE_DISK) || (sd->type == TYPE_ROM)) - scsi_device_put(sd); + scsi_device_put(sd); pdv->pdv_sd = NULL; } @@ -1069,7 +1047,6 @@ static sector_t pscsi_get_blocks(struct se_device *dev) if (pdv->pdv_bd && pdv->pdv_bd->bd_part) return pdv->pdv_bd->bd_part->nr_sects; - dump_stack(); return 0; } From 42ba2c265b085a83a3b18ddb6e087f231c97e02e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 13 Feb 2017 08:49:20 +1100 Subject: [PATCH 16/25] scsi: lpfc: Add shutdown method for kexec commit 85e8a23936ab3442de0c42da97d53b29f004ece1 upstream. We see lpfc devices regularly fail during kexec. Fix this by adding a shutdown method which mirrors the remove method. Signed-off-by: Anton Blanchard Reviewed-by: Mauricio Faria de Oliveira Tested-by: Mauricio Faria de Oliveira Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 734a0428ef0e..f7e3f27bb5c5 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -11393,6 +11393,7 @@ static struct pci_driver lpfc_driver = { .id_table = lpfc_id_table, .probe = lpfc_pci_probe_one, .remove = lpfc_pci_remove_one, + .shutdown = lpfc_pci_remove_one, .suspend = lpfc_pci_suspend_one, .resume = lpfc_pci_resume_one, .err_handler = &lpfc_err_handler, From de1ff848c74fcc5b172323c1d5fac88dc1ab0281 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Mon, 27 Feb 2017 16:58:36 -0800 Subject: [PATCH 17/25] scsi: libiscsi: add lock around task lists to fix list corruption regression commit 6f8830f5bbab16e54f261de187f3df4644a5b977 upstream. There's a rather long standing regression from the commit "libiscsi: Reduce locking contention in fast path" Depending on iSCSI target behavior, it's possible to hit the case in iscsi_complete_task where the task is still on a pending list (!list_empty(&task->running)). When that happens the task is removed from the list while holding the session back_lock, but other task list modification occur under the frwd_lock. That leads to linked list corruption and eventually a panicked system. Rather than back out the session lock split entirely, in order to try and keep some of the performance gains this patch adds another lock to maintain the task lists integrity. Major enterprise supported kernels have been backing out the lock split for while now, thanks to the efforts at IBM where a lab setup has the most reliable reproducer I've seen on this issue. This patch has been tested there successfully. Signed-off-by: Chris Leech Fixes: 659743b02c41 ("[SCSI] libiscsi: Reduce locking contention in fast path") Reported-by: Prashantha Subbarao Reviewed-by: Guilherme G. Piccoli Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/libiscsi.c | 26 +++++++++++++++++++++++++- include/scsi/libiscsi.h | 1 + 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index f9b6fba689ff..a530f08592cd 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -560,8 +560,12 @@ static void iscsi_complete_task(struct iscsi_task *task, int state) WARN_ON_ONCE(task->state == ISCSI_TASK_FREE); task->state = state; - if (!list_empty(&task->running)) + spin_lock_bh(&conn->taskqueuelock); + if (!list_empty(&task->running)) { + pr_debug_once("%s while task on list", __func__); list_del_init(&task->running); + } + spin_unlock_bh(&conn->taskqueuelock); if (conn->task == task) conn->task = NULL; @@ -783,7 +787,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, if (session->tt->xmit_task(task)) goto free_task; } else { + spin_lock_bh(&conn->taskqueuelock); list_add_tail(&task->running, &conn->mgmtqueue); + spin_unlock_bh(&conn->taskqueuelock); iscsi_conn_queue_work(conn); } @@ -1474,8 +1480,10 @@ void iscsi_requeue_task(struct iscsi_task *task) * this may be on the requeue list already if the xmit_task callout * is handling the r2ts while we are adding new ones */ + spin_lock_bh(&conn->taskqueuelock); if (list_empty(&task->running)) list_add_tail(&task->running, &conn->requeue); + spin_unlock_bh(&conn->taskqueuelock); iscsi_conn_queue_work(conn); } EXPORT_SYMBOL_GPL(iscsi_requeue_task); @@ -1512,22 +1520,26 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) * only have one nop-out as a ping from us and targets should not * overflow us with nop-ins */ + spin_lock_bh(&conn->taskqueuelock); check_mgmt: while (!list_empty(&conn->mgmtqueue)) { conn->task = list_entry(conn->mgmtqueue.next, struct iscsi_task, running); list_del_init(&conn->task->running); + spin_unlock_bh(&conn->taskqueuelock); if (iscsi_prep_mgmt_task(conn, conn->task)) { /* regular RX path uses back_lock */ spin_lock_bh(&conn->session->back_lock); __iscsi_put_task(conn->task); spin_unlock_bh(&conn->session->back_lock); conn->task = NULL; + spin_lock_bh(&conn->taskqueuelock); continue; } rc = iscsi_xmit_task(conn); if (rc) goto done; + spin_lock_bh(&conn->taskqueuelock); } /* process pending command queue */ @@ -1535,19 +1547,24 @@ check_mgmt: conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task, running); list_del_init(&conn->task->running); + spin_unlock_bh(&conn->taskqueuelock); if (conn->session->state == ISCSI_STATE_LOGGING_OUT) { fail_scsi_task(conn->task, DID_IMM_RETRY); + spin_lock_bh(&conn->taskqueuelock); continue; } rc = iscsi_prep_scsi_cmd_pdu(conn->task); if (rc) { if (rc == -ENOMEM || rc == -EACCES) { + spin_lock_bh(&conn->taskqueuelock); list_add_tail(&conn->task->running, &conn->cmdqueue); conn->task = NULL; + spin_unlock_bh(&conn->taskqueuelock); goto done; } else fail_scsi_task(conn->task, DID_ABORT); + spin_lock_bh(&conn->taskqueuelock); continue; } rc = iscsi_xmit_task(conn); @@ -1558,6 +1575,7 @@ check_mgmt: * we need to check the mgmt queue for nops that need to * be sent to aviod starvation */ + spin_lock_bh(&conn->taskqueuelock); if (!list_empty(&conn->mgmtqueue)) goto check_mgmt; } @@ -1577,12 +1595,15 @@ check_mgmt: conn->task = task; list_del_init(&conn->task->running); conn->task->state = ISCSI_TASK_RUNNING; + spin_unlock_bh(&conn->taskqueuelock); rc = iscsi_xmit_task(conn); if (rc) goto done; + spin_lock_bh(&conn->taskqueuelock); if (!list_empty(&conn->mgmtqueue)) goto check_mgmt; } + spin_unlock_bh(&conn->taskqueuelock); spin_unlock_bh(&conn->session->frwd_lock); return -ENODATA; @@ -1738,7 +1759,9 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) goto prepd_reject; } } else { + spin_lock_bh(&conn->taskqueuelock); list_add_tail(&task->running, &conn->cmdqueue); + spin_unlock_bh(&conn->taskqueuelock); iscsi_conn_queue_work(conn); } @@ -2897,6 +2920,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size, INIT_LIST_HEAD(&conn->mgmtqueue); INIT_LIST_HEAD(&conn->cmdqueue); INIT_LIST_HEAD(&conn->requeue); + spin_lock_init(&conn->taskqueuelock); INIT_WORK(&conn->xmitwork, iscsi_xmitworker); /* allocate login_task used for the login/text sequences */ diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 4d1c46aac331..c7b1dc713cdd 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -196,6 +196,7 @@ struct iscsi_conn { struct iscsi_task *task; /* xmit task in progress */ /* xmit */ + spinlock_t taskqueuelock; /* protects the next three lists */ struct list_head mgmtqueue; /* mgmt (control) xmit queue */ struct list_head cmdqueue; /* data-path cmd queue */ struct list_head requeue; /* tasks needing another run */ From 033850b953004b7398466effbaec77bc2e404f6f Mon Sep 17 00:00:00 2001 From: Max Lohrmann Date: Tue, 7 Mar 2017 22:09:56 -0800 Subject: [PATCH 18/25] target: Fix VERIFY_16 handling in sbc_parse_cdb commit 13603685c1f12c67a7a2427f00b63f39a2b6f7c9 upstream. As reported by Max, the Windows 2008 R2 chkdsk utility expects VERIFY_16 to be supported, and does not handle the returned CHECK_CONDITION properly, resulting in an infinite loop. The kernel will log huge amounts of this error: kernel: TARGET_CORE[iSCSI]: Unsupported SCSI Opcode 0x8f, sending CHECK_CONDITION. Signed-off-by: Max Lohrmann Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_sbc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index aabd6602da6c..a53fb23a0411 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -1104,9 +1104,15 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return ret; break; case VERIFY: + case VERIFY_16: size = 0; - sectors = transport_get_sectors_10(cdb); - cmd->t_task_lba = transport_lba_32(cdb); + if (cdb[0] == VERIFY) { + sectors = transport_get_sectors_10(cdb); + cmd->t_task_lba = transport_lba_32(cdb); + } else { + sectors = transport_get_sectors_16(cdb); + cmd->t_task_lba = transport_lba_64(cdb); + } cmd->execute_cmd = sbc_emulate_noop; goto check_lba; case REZERO_UNIT: From c795d8780cc85403a32e92b5cc55e35cefcaf360 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:39:01 +0100 Subject: [PATCH 19/25] isdn/gigaset: fix NULL-deref at probe commit 68c32f9c2a36d410aa242e661506e5b2c2764179 upstream. Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Fixes: cf7776dc05b8 ("[PATCH] isdn4linux: Siemens Gigaset drivers - direct USB connection") Cc: Hansjoerg Lipp Signed-off-by: Johan Hovold Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/gigaset/bas-gigaset.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c index aecec6d32463..7f1c625b08ec 100644 --- a/drivers/isdn/gigaset/bas-gigaset.c +++ b/drivers/isdn/gigaset/bas-gigaset.c @@ -2317,6 +2317,9 @@ static int gigaset_probe(struct usb_interface *interface, return -ENODEV; } + if (hostif->desc.bNumEndpoints < 1) + return -ENODEV; + dev_info(&udev->dev, "%s: Device matched (Vendor: 0x%x, Product: 0x%x)\n", __func__, le16_to_cpu(udev->descriptor.idVendor), From a33e71c5f0cbbf9d5f25ebad90ed2b2b117444bb Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 6 Mar 2017 12:58:42 -0500 Subject: [PATCH 20/25] gfs2: Avoid alignment hole in struct lm_lockname commit 28ea06c46fbcab63fd9a55531387b7928a18a590 upstream. Commit 88ffbf3e03 switches to using rhashtables for glocks, hashing over the entire struct lm_lockname instead of its individual fields. On some architectures, struct lm_lockname contains a hole of uninitialized memory due to alignment rules, which now leads to incorrect hash values. Get rid of that hole. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/incore.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index a6a3389a07fc..51519c2836b5 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -207,7 +207,7 @@ struct lm_lockname { struct gfs2_sbd *ln_sbd; u64 ln_number; unsigned int ln_type; -}; +} __packed __aligned(sizeof(int)); #define lm_name_equal(name1, name2) \ (((name1)->ln_number == (name2)->ln_number) && \ From 3f406ecddf8a8d4b6c2de195336ae1c6b56722c1 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Sat, 25 Feb 2017 13:00:19 -0800 Subject: [PATCH 21/25] percpu: acquire pcpu_lock when updating pcpu_nr_empty_pop_pages commit 320661b08dd6f1746d5c7ab4eb435ec64b97cd45 upstream. Update to pcpu_nr_empty_pop_pages in pcpu_alloc() is currently done without holding pcpu_lock. This can lead to bad updates to the variable. Add missing lock calls. Fixes: b539b87fed37 ("percpu: implmeent pcpu_nr_empty_pop_pages and chunk->nr_populated") Signed-off-by: Tahsin Erdogan Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- mm/percpu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/percpu.c b/mm/percpu.c index 255714302394..f014cebbf405 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1010,8 +1010,11 @@ area_found: mutex_unlock(&pcpu_alloc_mutex); } - if (chunk != pcpu_reserved_chunk) + if (chunk != pcpu_reserved_chunk) { + spin_lock_irqsave(&pcpu_lock, flags); pcpu_nr_empty_pop_pages -= occ_pages; + spin_unlock_irqrestore(&pcpu_lock, flags); + } if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW) pcpu_schedule_balance_work(); From 228514bf243036ab932f7e669db6299023616956 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 1 Mar 2017 15:39:07 -0500 Subject: [PATCH 22/25] cgroup/pids: remove spurious suspicious RCU usage warning commit 1d18c2747f937f1b5ec65ce6bf4ccb9ca1aea9e8 upstream. pids_can_fork() is special in that the css association is guaranteed to be stable throughout the function and thus doesn't need RCU protection around task_css access. When determining the css to charge the pid, task_css_check() is used to override the RCU sanity check. While adding a warning message on fork rejection from pids limit, 135b8b37bd91 ("cgroup: Add pids controller event when fork fails because of pid limit") incorrectly added a task_css access which is neither RCU protected or explicitly annotated. This triggers the following suspicious RCU usage warning when RCU debugging is enabled. cgroup: fork rejected by pids controller in =============================== [ ERR: suspicious RCU usage. ] 4.10.0-work+ #1 Not tainted ------------------------------- ./include/linux/cgroup.h:435 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 0 1 lock held by bash/1748: #0: (&cgroup_threadgroup_rwsem){+++++.}, at: [] _do_fork+0xe6/0x6e0 stack backtrace: CPU: 3 PID: 1748 Comm: bash Not tainted 4.10.0-work+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-1.fc25 04/01/2014 Call Trace: dump_stack+0x68/0x93 lockdep_rcu_suspicious+0xd7/0x110 pids_can_fork+0x1c7/0x1d0 cgroup_can_fork+0x67/0xc0 copy_process.part.58+0x1709/0x1e90 _do_fork+0xe6/0x6e0 SyS_clone+0x19/0x20 do_syscall_64+0x5c/0x140 entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:0x7f7853fab93a RSP: 002b:00007ffc12d05c90 EFLAGS: 00000246 ORIG_RAX: 0000000000000038 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f7853fab93a RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000001200011 RBP: 00007ffc12d05cc0 R08: 0000000000000000 R09: 00007f78548db700 R10: 00007f78548db9d0 R11: 0000000000000246 R12: 00000000000006d4 R13: 0000000000000001 R14: 0000000000000000 R15: 000055e3ebe2c04d /asdf There's no reason to dereference task_css again here when the associated css is already available. Fix it by replacing the task_cgroup() call with css->cgroup. Signed-off-by: Tejun Heo Reported-by: Mike Galbraith Fixes: 135b8b37bd91 ("cgroup: Add pids controller event when fork fails because of pid limit") Cc: Kenny Yu Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup_pids.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c index 2bd673783f1a..a57242e0d5a6 100644 --- a/kernel/cgroup_pids.c +++ b/kernel/cgroup_pids.c @@ -229,7 +229,7 @@ static int pids_can_fork(struct task_struct *task) /* Only log the first time events_limit is incremented. */ if (atomic64_inc_return(&pids->events_limit) == 1) { pr_info("cgroup: fork rejected by pids controller in "); - pr_cont_cgroup_path(task_cgroup(current, pids_cgrp_id)); + pr_cont_cgroup_path(css->cgroup); pr_cont("\n"); } cgroup_file_notify(&pids->events_file); From dea2f1e0c5e9a325ae997b920cdc50bcefae804b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 14 Mar 2017 19:24:19 -0400 Subject: [PATCH 23/25] drm/amdgpu/si: add dpm quirk for Oland MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 18a8de1bc37e97dff1c96ee6cf49adbd02a0f775 upstream. OLAND 0x1002:0x6604 0x1028:0x066F 0x00 seems to have problems with higher sclks. Acked-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/si_dpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index b447a01ab21a..09e6a7320bb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -3506,6 +3506,12 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, max_sclk = 75000; max_mclk = 80000; } + } else if (adev->asic_type == CHIP_OLAND) { + if ((adev->pdev->device == 0x6604) && + (adev->pdev->subsystem_vendor == 0x1028) && + (adev->pdev->subsystem_device == 0x066F)) { + max_sclk = 75000; + } } /* Apply dpm quirks */ while (p && p->chip_device != 0) { From c2eeabe94145ea1c8b767162ede02cc8f55f4106 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 15 Feb 2017 01:26:39 -0500 Subject: [PATCH 24/25] ext4: fix fencepost in s_first_meta_bg validation commit 2ba3e6e8afc9b6188b471f27cf2b5e3cf34e7af2 upstream. It is OK for s_first_meta_bg to be equal to the number of block group descriptor blocks. (It rarely happens, but it shouldn't cause any problems.) https://bugzilla.kernel.org/show_bug.cgi?id=194567 Fixes: 3a4b77cd47bb837b8557595ec7425f281f2ca1fe Signed-off-by: Theodore Ts'o Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index afe29ba42a4e..5fa9ba1de429 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3830,7 +3830,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); if (ext4_has_feature_meta_bg(sb)) { - if (le32_to_cpu(es->s_first_meta_bg) >= db_count) { + if (le32_to_cpu(es->s_first_meta_bg) > db_count) { ext4_msg(sb, KERN_WARNING, "first meta block group too large: %u " "(group descriptor block count %u)", From 2a486264b736de1c1d938c9c781efbadbf9fd794 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 26 Mar 2017 13:06:11 +0200 Subject: [PATCH 25/25] Linux 4.9.18 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 004f90a4e613..c10d0e634e68 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 17 +SUBLEVEL = 18 EXTRAVERSION = NAME = Roaring Lionus