From 736bb11898ef748da1d805f40d485b66ceac9a3c Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Wed, 1 Jul 2020 08:18:27 +0200 Subject: [PATCH 01/34] modpost: remove use of non-standard strsep() in HOSTCC code strsep() is neither standard C nor POSIX and used outside the kernel code here. Using it here requires that the build host supports it out of the box which is e.g. not true for a Darwin build host and using a cross-compiler. This leads to: scripts/mod/modpost.c:145:2: warning: implicit declaration of function 'strsep' [-Wimplicit-function-declaration] return strsep(stringp, "\n"); ^ and a segfault when running MODPOST. See also: https://stackoverflow.com/a/7219504 So let's replace this by strchr() instead of using strsep(). It does not hurt kernel size or speed since this code is run on the build host. Fixes: ac5100f5432967 ("modpost: add read_text_file() and get_line() helpers") Co-developed-by: Masahiro Yamada Signed-off-by: H. Nikolaus Schaller Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 6aea65c65745..45f2ab2ec2d4 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -138,11 +138,19 @@ char *read_text_file(const char *filename) char *get_line(char **stringp) { + char *orig = *stringp, *next; + /* do not return the unwanted extra line at EOF */ - if (*stringp && **stringp == '\0') + if (!orig || *orig == '\0') return NULL; - return strsep(stringp, "\n"); + next = strchr(orig, '\n'); + if (next) + *next++ = '\0'; + + *stringp = next; + + return orig; } /* A list of all modules we processed */ From 2edc66e22ba1af33020ff8b75fe1a2b055cdb73f Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Fri, 3 Jul 2020 19:28:54 +0300 Subject: [PATCH 02/34] habanalabs: block WREG_BULK packet on PDMA WREG_BULK is a special packet that has a variable length. Therefore, we can't parse it when validating CBs that go to the PCI DMA queue. In case the user needs to use it, it can put multiple WREG32 packets instead. Signed-off-by: Oded Gabbay Reviewed-by: Omer Shpigelman --- drivers/misc/habanalabs/gaudi/gaudi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 834470d10b46..e22206527164 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -3865,6 +3865,12 @@ static int gaudi_validate_cb(struct hl_device *hdev, rc = -EPERM; break; + case PACKET_WREG_BULK: + dev_err(hdev->dev, + "User not allowed to use WREG_BULK\n"); + rc = -EPERM; + break; + case PACKET_LOAD_AND_EXE: rc = gaudi_validate_load_and_exe_pkt(hdev, parser, (struct packet_load_and_exe *) user_pkt); @@ -3880,7 +3886,6 @@ static int gaudi_validate_cb(struct hl_device *hdev, break; case PACKET_WREG_32: - case PACKET_WREG_BULK: case PACKET_MSG_LONG: case PACKET_MSG_SHORT: case PACKET_REPEAT: From e38bfd30e08802d9661efffb8c048bd53a3acfc4 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Fri, 3 Jul 2020 20:46:12 +0300 Subject: [PATCH 03/34] habanalabs: set clock gating per engine For debugging purposes, we need to allow the root user better control of the clock gating feature of the DMA and compute engines. Therefore, change the clock gating debugfs interface to be bitmask instead of true/false. Each bit represents a different engine, according to gaudi_engine_id enum. See debugfs documentation for more details. Signed-off-by: Oded Gabbay Reviewed-by: Omer Shpigelman --- .../ABI/testing/debugfs-driver-habanalabs | 11 +- drivers/misc/habanalabs/debugfs.c | 17 +-- drivers/misc/habanalabs/device.c | 2 +- drivers/misc/habanalabs/gaudi/gaudi.c | 112 ++++++++++++------ drivers/misc/habanalabs/goya/goya.c | 8 +- drivers/misc/habanalabs/habanalabs.h | 13 +- drivers/misc/habanalabs/habanalabs_drv.c | 2 +- 7 files changed, 103 insertions(+), 62 deletions(-) diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs index f6d9c2a8d528..2e9ae311e02d 100644 --- a/Documentation/ABI/testing/debugfs-driver-habanalabs +++ b/Documentation/ABI/testing/debugfs-driver-habanalabs @@ -16,7 +16,16 @@ Description: Allow the root user to disable/enable in runtime the clock gating mechanism in Gaudi. Due to how Gaudi is built, the clock gating needs to be disabled in order to access the registers of the TPC and MME engines. This is sometimes needed - during debug by the user and hence the user needs this option + during debug by the user and hence the user needs this option. + The user can supply a bitmask value, each bit represents + a different engine to disable/enable its clock gating feature. + The bitmask is composed of 20 bits: + 0 - 7 : DMA channels + 8 - 11 : MME engines + 12 - 19 : TPC engines + The bit's location of a specific engine can be determined + using (1 << GAUDI_ENGINE_ID_*). GAUDI_ENGINE_ID_* values + are defined in uapi habanalabs.h file in enum gaudi_engine_id What: /sys/kernel/debug/habanalabs/hl/command_buffers Date: Jan 2019 diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index fc4372c18ce2..136b8f6fa0b3 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -981,7 +981,7 @@ static ssize_t hl_clk_gate_read(struct file *f, char __user *buf, if (*ppos) return 0; - sprintf(tmp_buf, "%d\n", hdev->clock_gating); + sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask); rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, strlen(tmp_buf) + 1); @@ -993,7 +993,7 @@ static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf, { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; - u32 value; + u64 value; ssize_t rc; if (atomic_read(&hdev->in_reset)) { @@ -1002,19 +1002,12 @@ static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf, return 0; } - rc = kstrtouint_from_user(buf, count, 10, &value); + rc = kstrtoull_from_user(buf, count, 16, &value); if (rc) return rc; - if (value) { - hdev->clock_gating = 1; - if (hdev->asic_funcs->enable_clock_gating) - hdev->asic_funcs->enable_clock_gating(hdev); - } else { - if (hdev->asic_funcs->disable_clock_gating) - hdev->asic_funcs->disable_clock_gating(hdev); - hdev->clock_gating = 0; - } + hdev->clock_gating_mask = value; + hdev->asic_funcs->set_clock_gating(hdev); return count; } diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 2b38a119704c..59608d1bac88 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -608,7 +608,7 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable) hdev->in_debug = 0; if (!hdev->hard_reset_pending) - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); goto out; } diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index e22206527164..9d6aebef8854 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -98,6 +98,11 @@ #define GAUDI_ARB_WDT_TIMEOUT 0x1000000 +#define GAUDI_CLK_GATE_DEBUGFS_MASK (\ + BIT(GAUDI_ENGINE_ID_MME_0) |\ + BIT(GAUDI_ENGINE_ID_MME_2) |\ + GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0)) + static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", @@ -106,14 +111,14 @@ static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { }; static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { - [GAUDI_PCI_DMA_1] = 0, - [GAUDI_PCI_DMA_2] = 1, - [GAUDI_PCI_DMA_3] = 5, - [GAUDI_HBM_DMA_1] = 2, - [GAUDI_HBM_DMA_2] = 3, - [GAUDI_HBM_DMA_3] = 4, - [GAUDI_HBM_DMA_4] = 6, - [GAUDI_HBM_DMA_5] = 7 + [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, + [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, + [GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5, + [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2, + [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3, + [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4, + [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6, + [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7 }; static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { @@ -1819,7 +1824,7 @@ static void gaudi_init_golden_registers(struct hl_device *hdev) gaudi_init_rate_limiter(hdev); - gaudi_disable_clock_gating(hdev); + hdev->asic_funcs->disable_clock_gating(hdev); for (tpc_id = 0, tpc_offset = 0; tpc_id < TPC_NUMBER_OF_ENGINES; @@ -2531,46 +2536,55 @@ static void gaudi_tpc_stall(struct hl_device *hdev) WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); } -static void gaudi_enable_clock_gating(struct hl_device *hdev) +static void gaudi_set_clock_gating(struct hl_device *hdev) { struct gaudi_device *gaudi = hdev->asic_specific; u32 qman_offset; int i; - if (!hdev->clock_gating) - return; - - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) - return; - /* In case we are during debug session, don't enable the clock gate * as it may interfere */ if (hdev->in_debug) return; - for (i = 0, qman_offset = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { + for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) { + if (!(hdev->clock_gating_mask & + (BIT_ULL(gaudi_dma_assignment[i])))) + continue; + qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN); WREG32(mmDMA0_QM_CGM_CFG + qman_offset, QMAN_UPPER_CP_CGM_PWR_GATE_EN); } - for (; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { + for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) { + if (!(hdev->clock_gating_mask & + (BIT_ULL(gaudi_dma_assignment[i])))) + continue; + qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN); WREG32(mmDMA0_QM_CGM_CFG + qman_offset, QMAN_COMMON_CP_CGM_PWR_GATE_EN); } - WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); - WREG32(mmMME0_QM_CGM_CFG, - QMAN_COMMON_CP_CGM_PWR_GATE_EN); - WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); - WREG32(mmMME2_QM_CGM_CFG, - QMAN_COMMON_CP_CGM_PWR_GATE_EN); + if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0))) { + WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); + WREG32(mmMME0_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN); + } + + if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2))) { + WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); + WREG32(mmMME2_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN); + } for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { + if (!(hdev->clock_gating_mask & + (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)))) + continue; + WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN); WREG32(mmTPC0_QM_CGM_CFG + qman_offset, @@ -2663,7 +2677,7 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset) gaudi_stop_hbm_dma_qmans(hdev); gaudi_stop_pci_dma_qmans(hdev); - gaudi_disable_clock_gating(hdev); + hdev->asic_funcs->disable_clock_gating(hdev); msleep(wait_timeout_ms); @@ -3003,7 +3017,7 @@ static int gaudi_hw_init(struct hl_device *hdev) gaudi_init_tpc_qmans(hdev); - gaudi_enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); gaudi_enable_timestamp(hdev); @@ -3112,7 +3126,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER | - HW_CAP_HBM_SCRAMBLER); + HW_CAP_HBM_SCRAMBLER | + HW_CAP_CLK_GATE); + memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); } @@ -4526,13 +4542,18 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) int rc = 0; if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && + (hdev->clock_gating_mask & + GAUDI_CLK_GATE_DEBUGFS_MASK)) { + dev_err_ratelimited(hdev->dev, "Can't read register - clock gating is enabled!\n"); rc = -EFAULT; } else { *val = RREG32(addr - CFG_BASE); } + } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + @@ -4568,13 +4589,18 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) int rc = 0; if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && + (hdev->clock_gating_mask & + GAUDI_CLK_GATE_DEBUGFS_MASK)) { + dev_err_ratelimited(hdev->dev, "Can't write register - clock gating is enabled!\n"); rc = -EFAULT; } else { WREG32(addr - CFG_BASE, val); } + } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { writel(val, hdev->pcie_bar[SRAM_BAR_ID] + @@ -4610,7 +4636,11 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) int rc = 0; if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && + (hdev->clock_gating_mask & + GAUDI_CLK_GATE_DEBUGFS_MASK)) { + dev_err_ratelimited(hdev->dev, "Can't read register - clock gating is enabled!\n"); rc = -EFAULT; @@ -4620,6 +4650,7 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) *val = (((u64) val_h) << 32) | val_l; } + } else if ((addr >= SRAM_BASE_ADDR) && (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + @@ -4656,7 +4687,11 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) int rc = 0; if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && + (hdev->clock_gating_mask & + GAUDI_CLK_GATE_DEBUGFS_MASK)) { + dev_err_ratelimited(hdev->dev, "Can't write register - clock gating is enabled!\n"); rc = -EFAULT; @@ -4665,6 +4700,7 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val)); } + } else if ((addr >= SRAM_BASE_ADDR) && (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + @@ -4886,7 +4922,7 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); } @@ -5267,7 +5303,7 @@ static void gaudi_print_ecc_info_generic(struct hl_device *hdev, } if (disable_clock_gating) { - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); } } @@ -5754,7 +5790,7 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, /* Clear interrupts */ WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); @@ -6270,7 +6306,7 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask, if (s) seq_puts(s, "\n"); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); @@ -6371,7 +6407,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, dev_err(hdev->dev, "Timeout while waiting for TPC%d icache prefetch\n", tpc_id); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); return -EIO; } @@ -6400,7 +6436,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 1000, kernel_timeout); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); if (rc) { @@ -6741,7 +6777,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, .send_heartbeat = gaudi_send_heartbeat, - .enable_clock_gating = gaudi_enable_clock_gating, + .set_clock_gating = gaudi_set_clock_gating, .disable_clock_gating = gaudi_disable_clock_gating, .debug_coresight = gaudi_debug_coresight, .is_device_idle = gaudi_is_device_idle, diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 0d2952bb58df..83f0c70f140b 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -5028,14 +5028,14 @@ int goya_armcp_info_get(struct hl_device *hdev) return 0; } -static void goya_enable_clock_gating(struct hl_device *hdev) +static void goya_set_clock_gating(struct hl_device *hdev) { - + /* clock gating not supported in Goya */ } static void goya_disable_clock_gating(struct hl_device *hdev) { - + /* clock gating not supported in Goya */ } static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask, @@ -5259,7 +5259,7 @@ static const struct hl_asic_funcs goya_funcs = { .mmu_invalidate_cache = goya_mmu_invalidate_cache, .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range, .send_heartbeat = goya_send_heartbeat, - .enable_clock_gating = goya_enable_clock_gating, + .set_clock_gating = goya_set_clock_gating, .disable_clock_gating = goya_disable_clock_gating, .debug_coresight = goya_debug_coresight, .is_device_idle = goya_is_device_idle, diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 1ecdcf8b763a..dee5cc25fe5b 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -578,8 +578,9 @@ enum hl_pll_frequency { * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with * ASID-VA-size mask. * @send_heartbeat: send is-alive packet to ArmCP and verify response. - * @enable_clock_gating: enable clock gating for reducing power consumption. - * @disable_clock_gating: disable clock for accessing registers on HBW. + * @set_clock_gating: enable/disable clock gating per engine according to + * clock gating mask in hdev + * @disable_clock_gating: disable clock gating completely * @debug_coresight: perform certain actions on Coresight for debugging. * @is_device_idle: return true if device is idle, false otherwise. * @soft_reset_late_init: perform certain actions needed after soft reset. @@ -680,7 +681,7 @@ struct hl_asic_funcs { int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard, u32 asid, u64 va, u64 size); int (*send_heartbeat)(struct hl_device *hdev); - void (*enable_clock_gating)(struct hl_device *hdev); + void (*set_clock_gating)(struct hl_device *hdev); void (*disable_clock_gating)(struct hl_device *hdev); int (*debug_coresight)(struct hl_device *hdev, void *data); bool (*is_device_idle)(struct hl_device *hdev, u32 *mask, @@ -1398,6 +1399,9 @@ struct hl_device_idle_busy_ts { * @max_power: the max power of the device, as configured by the sysadmin. This * value is saved so in case of hard-reset, the driver will restore * this value and update the F/W after the re-initialization + * @clock_gating_mask: is clock gating enabled. bitmask that represents the + * different engines. See debugfs-driver-habanalabs for + * details. * @in_reset: is device in reset flow. * @curr_pll_profile: current PLL profile. * @cs_active_cnt: number of active command submissions on this device (active @@ -1425,7 +1429,6 @@ struct hl_device_idle_busy_ts { * @init_done: is the initialization of the device done. * @mmu_enable: is MMU enabled. * @mmu_huge_page_opt: is MMU huge pages optimization enabled. - * @clock_gating: is clock gating enabled. * @device_cpu_disabled: is the device CPU disabled (due to timeouts) * @dma_mask: the dma mask that was set for this device * @in_debug: is device under debug. This, together with fpriv_list, enforces @@ -1493,6 +1496,7 @@ struct hl_device { atomic64_t dram_used_mem; u64 timeout_jiffies; u64 max_power; + u64 clock_gating_mask; atomic_t in_reset; enum hl_pll_frequency curr_pll_profile; int cs_active_cnt; @@ -1514,7 +1518,6 @@ struct hl_device { u8 dram_default_page_mapping; u8 pmmu_huge_range; u8 init_done; - u8 clock_gating; u8 device_cpu_disabled; u8 dma_mask; u8 in_debug; diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c index 8652c7e5d7f1..22716da9f85f 100644 --- a/drivers/misc/habanalabs/habanalabs_drv.c +++ b/drivers/misc/habanalabs/habanalabs_drv.c @@ -232,7 +232,7 @@ static void set_driver_behavior_per_device(struct hl_device *hdev) hdev->fw_loading = 1; hdev->cpu_queues_enable = 1; hdev->heartbeat = 1; - hdev->clock_gating = 1; + hdev->clock_gating_mask = ULONG_MAX; hdev->reset_pcilink = 0; hdev->axi_drain = 0; From 788cacf308871db0a619952321bedfec8f1773e2 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 7 Jul 2020 17:30:13 +0300 Subject: [PATCH 04/34] habanalabs: set 4s timeout for message to device CPU We see that sometimes the CPU in GOYA and GAUDI is occupied by the power/thermal loop and can't answer requests from the driver fast enough. Therefore, to avoid false notifications on timeouts, increase the timeout to 4 seconds on each message sent to the device CPU. Signed-off-by: Oded Gabbay Reviewed-by: Tomer Tayar --- drivers/misc/habanalabs/debugfs.c | 6 +++--- drivers/misc/habanalabs/firmware_if.c | 10 +++++----- drivers/misc/habanalabs/gaudi/gaudi.c | 4 ++++ drivers/misc/habanalabs/goya/goya.c | 12 ++++++++---- drivers/misc/habanalabs/habanalabs.h | 6 +++++- drivers/misc/habanalabs/hwmon.c | 19 +++++++++---------- drivers/misc/habanalabs/sysfs.c | 11 ++++------- 7 files changed, 38 insertions(+), 30 deletions(-) diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index 136b8f6fa0b3..0bc036e01ee8 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -36,7 +36,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, pkt.i2c_reg = i2c_reg; rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, (long *) val); + 0, (long *) val); if (rc) dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc); @@ -63,7 +63,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, pkt.value = cpu_to_le64(val); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc); @@ -87,7 +87,7 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state) pkt.value = cpu_to_le64(state); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc); diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c index baf790cf4b78..d27841cb5bcb 100644 --- a/drivers/misc/habanalabs/firmware_if.c +++ b/drivers/misc/habanalabs/firmware_if.c @@ -61,7 +61,7 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode) pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT); return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, - sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL); + sizeof(pkt), 0, NULL); } int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, @@ -144,7 +144,7 @@ int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type) pkt.value = cpu_to_le64(event_type); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, &result); + 0, &result); if (rc) dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); @@ -183,7 +183,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, ARMCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, - total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result); + total_pkt_size, 0, &result); if (rc) dev_err(hdev->dev, "failed to unmask IRQ array\n"); @@ -204,7 +204,7 @@ int hl_fw_test_cpu_queue(struct hl_device *hdev) test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt, - sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result); + sizeof(test_pkt), 0, &result); if (!rc) { if (result != ARMCP_PACKET_FENCE_VAL) @@ -248,7 +248,7 @@ int hl_fw_send_heartbeat(struct hl_device *hdev) hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, - sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result); + sizeof(hb_pkt), 0, &result); if ((rc) || (result != ARMCP_PACKET_FENCE_VAL)) rc = -EIO; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 9d6aebef8854..637a9d608707 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -80,6 +80,7 @@ #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */ +#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 @@ -3479,6 +3480,9 @@ static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, return 0; } + if (!timeout) + timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC; + return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len, timeout, result); } diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 83f0c70f140b..88460b2138d8 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -88,6 +88,7 @@ #define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) #define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) #define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */ +#define GOYA_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ #define GOYA_QMAN0_FENCE_VAL 0xD169B243 @@ -2830,6 +2831,9 @@ int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, return 0; } + if (!timeout) + timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC; + return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len, timeout, result); } @@ -4431,8 +4435,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << ARMCP_PKT_CTL_OPCODE_SHIFT); - rc = goya_send_cpu_message(hdev, (u32 *) pkt, total_pkt_size, - HL_DEVICE_TIMEOUT_USEC, &result); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, + total_pkt_size, 0, &result); if (rc) dev_err(hdev->dev, "failed to unmask IRQ array\n"); @@ -4464,8 +4468,8 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type) ARMCP_PKT_CTL_OPCODE_SHIFT); pkt.value = cpu_to_le64(event_type); - rc = goya_send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, &result); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, &result); if (rc) dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index dee5cc25fe5b..194d83352696 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -588,7 +588,11 @@ enum hl_pll_frequency { * @hw_queues_unlock: release H/W queues lock. * @get_pci_id: retrieve PCI ID. * @get_eeprom_data: retrieve EEPROM data from F/W. - * @send_cpu_message: send buffer to ArmCP. + * @send_cpu_message: send message to F/W. If the message is timedout, the + * driver will eventually reset the device. The timeout can + * be determined by the calling function or it can be 0 and + * then the timeout is the default timeout for the specific + * ASIC * @get_hw_state: retrieve the H/W state * @pci_bars_map: Map PCI BARs. * @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns diff --git a/drivers/misc/habanalabs/hwmon.c b/drivers/misc/habanalabs/hwmon.c index 8c6cd77e6af6..b997336fa75f 100644 --- a/drivers/misc/habanalabs/hwmon.c +++ b/drivers/misc/habanalabs/hwmon.c @@ -10,7 +10,6 @@ #include #include -#define SENSORS_PKT_TIMEOUT 1000000 /* 1s */ #define HWMON_NR_SENSOR_TYPES (hwmon_pwm + 1) int hl_build_hwmon_channel_info(struct hl_device *hdev, @@ -323,7 +322,7 @@ int hl_get_temperature(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -350,7 +349,7 @@ int hl_set_temperature(struct hl_device *hdev, pkt.value = __cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, @@ -374,7 +373,7 @@ int hl_get_voltage(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -400,7 +399,7 @@ int hl_get_current(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -426,7 +425,7 @@ int hl_get_fan_speed(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -452,7 +451,7 @@ int hl_get_pwm_info(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -479,7 +478,7 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, pkt.value = cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, @@ -502,7 +501,7 @@ int hl_set_voltage(struct hl_device *hdev, pkt.value = __cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, @@ -527,7 +526,7 @@ int hl_set_current(struct hl_device *hdev, pkt.value = __cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, diff --git a/drivers/misc/habanalabs/sysfs.c b/drivers/misc/habanalabs/sysfs.c index 5d78d5e1c782..70b6b1863c2e 100644 --- a/drivers/misc/habanalabs/sysfs.c +++ b/drivers/misc/habanalabs/sysfs.c @@ -9,9 +9,6 @@ #include -#define SET_CLK_PKT_TIMEOUT 1000000 /* 1s */ -#define SET_PWR_PKT_TIMEOUT 1000000 /* 1s */ - long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) { struct armcp_packet pkt; @@ -29,7 +26,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) pkt.pll_index = cpu_to_le32(pll_index); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_CLK_PKT_TIMEOUT, &result); + 0, &result); if (rc) { dev_err(hdev->dev, @@ -54,7 +51,7 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) pkt.value = cpu_to_le64(freq); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_CLK_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, @@ -74,7 +71,7 @@ u64 hl_get_max_power(struct hl_device *hdev) ARMCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_PWR_PKT_TIMEOUT, &result); + 0, &result); if (rc) { dev_err(hdev->dev, "Failed to get max power, error %d\n", rc); @@ -96,7 +93,7 @@ void hl_set_max_power(struct hl_device *hdev, u64 value) pkt.value = cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_PWR_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, "Failed to set max power, error %d\n", rc); From 20b1be59528295e5c2a8812059b8560753dd8e68 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 8 Jul 2020 01:35:08 +0900 Subject: [PATCH 05/34] kbuild: fix single target builds for external modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit f566e1fbadb6 ("kbuild: make multiple directory targets work") broke single target builds for external modules. Fix this. Fixes: f566e1fbadb6 ("kbuild: make multiple directory targets work") Reported-by: Bjørn Mork Signed-off-by: Masahiro Yamada Tested-by: Bjørn Mork --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fe0164a654c7..676f1cfb1d56 100644 --- a/Makefile +++ b/Makefile @@ -1754,7 +1754,7 @@ PHONY += descend $(build-dirs) descend: $(build-dirs) $(build-dirs): prepare $(Q)$(MAKE) $(build)=$@ \ - single-build=$(if $(filter-out $@/, $(filter $@/%, $(single-no-ko))),1) \ + single-build=$(if $(filter-out $@/, $(filter $@/%, $(KBUILD_SINGLE_TARGETS))),1) \ need-builtin=1 need-modorder=1 clean-dirs := $(addprefix _clean_, $(clean-dirs)) From e19485dc7a0d210b428a249c0595769bd495fb71 Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Mon, 13 Jul 2020 14:10:02 +0800 Subject: [PATCH 06/34] fpga: dfl: pci: reduce the scope of variable 'ret' This is to fix lkp cppcheck warnings: drivers/fpga/dfl-pci.c:230:6: warning: The scope of the variable 'ret' can be reduced. [variableScope] int ret = 0; ^ drivers/fpga/dfl-pci.c:230:10: warning: Variable 'ret' is assigned a value that is never used. [unreadVariable] int ret = 0; ^ Fixes: 3c2760b78f90 ("fpga: dfl: pci: fix return value of cci_pci_sriov_configure") Reported-by: kbuild test robot Signed-off-by: Xu Yilun Acked-by: Wu Hao Reviewed-by: Tom Rix Signed-off-by: Moritz Fischer --- drivers/fpga/dfl-pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/fpga/dfl-pci.c b/drivers/fpga/dfl-pci.c index 538755062ab7..a78c409bf2c4 100644 --- a/drivers/fpga/dfl-pci.c +++ b/drivers/fpga/dfl-pci.c @@ -227,7 +227,6 @@ static int cci_pci_sriov_configure(struct pci_dev *pcidev, int num_vfs) { struct cci_drvdata *drvdata = pci_get_drvdata(pcidev); struct dfl_fpga_cdev *cdev = drvdata->cdev; - int ret = 0; if (!num_vfs) { /* @@ -239,6 +238,8 @@ static int cci_pci_sriov_configure(struct pci_dev *pcidev, int num_vfs) dfl_fpga_cdev_config_ports_pf(cdev); } else { + int ret; + /* * before enable SRIOV, put released ports into VF access mode * first of all. From 8614afd689df59d9ce019439389be20bd788a897 Mon Sep 17 00:00:00 2001 From: Matthew Gerlach Date: Mon, 13 Jul 2020 14:10:03 +0800 Subject: [PATCH 07/34] fpga: dfl: fix bug in port reset handshake When putting the port in reset, driver must wait for the soft reset acknowledgment bit instead of the soft reset bit. Fixes: 47c1b19c160f (fpga: dfl: afu: add port ops support) Signed-off-by: Matthew Gerlach Signed-off-by: Xu Yilun Acked-by: Wu Hao Reviewed-by: Tom Rix Signed-off-by: Moritz Fischer --- drivers/fpga/dfl-afu-main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/fpga/dfl-afu-main.c b/drivers/fpga/dfl-afu-main.c index b0c31789a909..3fa2c5992173 100644 --- a/drivers/fpga/dfl-afu-main.c +++ b/drivers/fpga/dfl-afu-main.c @@ -83,7 +83,8 @@ int __afu_port_disable(struct platform_device *pdev) * on this port and minimum soft reset pulse width has elapsed. * Driver polls port_soft_reset_ack to determine if reset done by HW. */ - if (readq_poll_timeout(base + PORT_HDR_CTRL, v, v & PORT_CTRL_SFTRST, + if (readq_poll_timeout(base + PORT_HDR_CTRL, v, + v & PORT_CTRL_SFTRST_ACK, RST_POLL_INVL, RST_POLL_TIMEOUT)) { dev_err(&pdev->dev, "timeout, fail to reset device\n"); return -ETIMEDOUT; From cea7a0449ea3fa4883bf5dc8397f000d6b67d6cd Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 12 Jul 2020 23:34:57 +0300 Subject: [PATCH 08/34] habanalabs: prevent possible out-of-bounds array access Queue index is received from the user. Therefore, we must validate it before using it to access the queue props array. Signed-off-by: Oded Gabbay Reviewed-by: Tomer Tayar --- drivers/misc/habanalabs/command_submission.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c index b0f62cbbdc87..f3a8f113865d 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/command_submission.c @@ -499,11 +499,19 @@ static int validate_queue_index(struct hl_device *hdev, struct asic_fixed_properties *asic = &hdev->asic_prop; struct hw_queue_properties *hw_queue_prop; + /* This must be checked here to prevent out-of-bounds access to + * hw_queues_props array + */ + if (chunk->queue_index >= HL_MAX_QUEUES) { + dev_err(hdev->dev, "Queue index %d is invalid\n", + chunk->queue_index); + return -EINVAL; + } + hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; - if ((chunk->queue_index >= HL_MAX_QUEUES) || - (hw_queue_prop->type == QUEUE_TYPE_NA)) { - dev_err(hdev->dev, "Queue index %d is invalid\n", + if (hw_queue_prop->type == QUEUE_TYPE_NA) { + dev_err(hdev->dev, "Queue index %d is not applicable\n", chunk->queue_index); return -EINVAL; } From f07804ec77d77f8a9dcf570a24154e17747bc82f Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 17 Jul 2020 15:52:54 +0100 Subject: [PATCH 09/34] staging: comedi: ni_6527: fix INSN_CONFIG_DIGITAL_TRIG support `ni6527_intr_insn_config()` processes `INSN_CONFIG` comedi instructions for the "interrupt" subdevice. When `data[0]` is `INSN_CONFIG_DIGITAL_TRIG` it is configuring the digital trigger. When `data[2]` is `COMEDI_DIGITAL_TRIG_ENABLE_EDGES` it is configuring rising and falling edge detection for the digital trigger, using a base channel number (or shift amount) in `data[3]`, a rising edge bitmask in `data[4]` and falling edge bitmask in `data[5]`. If the base channel number (shift amount) is greater than or equal to the number of channels (24) of the digital input subdevice, there are no changes to the rising and falling edges, so the mask of channels to be changed can be set to 0, otherwise the mask of channels to be changed, and the rising and falling edge bitmasks are shifted by the base channel number before calling `ni6527_set_edge_detection()` to change the appropriate registers. Unfortunately, the code is comparing the base channel (shift amount) to the interrupt subdevice's number of channels (1) instead of the digital input subdevice's number of channels (24). Fix it by comparing to 32 because all shift amounts for an `unsigned int` must be less than that and everything from bit 24 upwards is ignored by `ni6527_set_edge_detection()` anyway. Fixes: 110f9e687c1a8 ("staging: comedi: ni_6527: support INSN_CONFIG_DIGITAL_TRIG") Cc: # 3.17+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20200717145257.112660-2-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_6527.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/ni_6527.c b/drivers/staging/comedi/drivers/ni_6527.c index 4d1eccb5041d..4518c2680b7c 100644 --- a/drivers/staging/comedi/drivers/ni_6527.c +++ b/drivers/staging/comedi/drivers/ni_6527.c @@ -332,7 +332,7 @@ static int ni6527_intr_insn_config(struct comedi_device *dev, case COMEDI_DIGITAL_TRIG_ENABLE_EDGES: /* check shift amount */ shift = data[3]; - if (shift >= s->n_chan) { + if (shift >= 32) { mask = 0; rising = 0; falling = 0; From 0bd0db42a030b75c20028c7ba6e327b9cb554116 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 17 Jul 2020 15:52:55 +0100 Subject: [PATCH 10/34] staging: comedi: addi_apci_1032: check INSN_CONFIG_DIGITAL_TRIG shift The `INSN_CONFIG` comedi instruction with sub-instruction code `INSN_CONFIG_DIGITAL_TRIG` includes a base channel in `data[3]`. This is used as a right shift amount for other bitmask values without being checked. Shift amounts greater than or equal to 32 will result in undefined behavior. Add code to deal with this. Fixes: 33cdce6293dcc ("staging: comedi: addi_apci_1032: conform to new INSN_CONFIG_DIGITAL_TRIG") Cc: #3.8+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20200717145257.112660-3-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- .../staging/comedi/drivers/addi_apci_1032.c | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/staging/comedi/drivers/addi_apci_1032.c b/drivers/staging/comedi/drivers/addi_apci_1032.c index 560649be9d13..e035c9f757a1 100644 --- a/drivers/staging/comedi/drivers/addi_apci_1032.c +++ b/drivers/staging/comedi/drivers/addi_apci_1032.c @@ -106,14 +106,22 @@ static int apci1032_cos_insn_config(struct comedi_device *dev, unsigned int *data) { struct apci1032_private *devpriv = dev->private; - unsigned int shift, oldmask; + unsigned int shift, oldmask, himask, lomask; switch (data[0]) { case INSN_CONFIG_DIGITAL_TRIG: if (data[1] != 0) return -EINVAL; shift = data[3]; - oldmask = (1U << shift) - 1; + if (shift < 32) { + oldmask = (1U << shift) - 1; + himask = data[4] << shift; + lomask = data[5] << shift; + } else { + oldmask = 0xffffffffu; + himask = 0; + lomask = 0; + } switch (data[2]) { case COMEDI_DIGITAL_TRIG_DISABLE: devpriv->ctrl = 0; @@ -136,8 +144,8 @@ static int apci1032_cos_insn_config(struct comedi_device *dev, devpriv->mode2 &= oldmask; } /* configure specified channels */ - devpriv->mode1 |= data[4] << shift; - devpriv->mode2 |= data[5] << shift; + devpriv->mode1 |= himask; + devpriv->mode2 |= lomask; break; case COMEDI_DIGITAL_TRIG_ENABLE_LEVELS: if (devpriv->ctrl != (APCI1032_CTRL_INT_ENA | @@ -154,8 +162,8 @@ static int apci1032_cos_insn_config(struct comedi_device *dev, devpriv->mode2 &= oldmask; } /* configure specified channels */ - devpriv->mode1 |= data[4] << shift; - devpriv->mode2 |= data[5] << shift; + devpriv->mode1 |= himask; + devpriv->mode2 |= lomask; break; default: return -EINVAL; From fc846e9db67c7e808d77bf9e2ef3d49e3820ce5d Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 17 Jul 2020 15:52:57 +0100 Subject: [PATCH 11/34] staging: comedi: addi_apci_1500: check INSN_CONFIG_DIGITAL_TRIG shift The `INSN_CONFIG` comedi instruction with sub-instruction code `INSN_CONFIG_DIGITAL_TRIG` includes a base channel in `data[3]`. This is used as a right shift amount for other bitmask values without being checked. Shift amounts greater than or equal to 32 will result in undefined behavior. Add code to deal with this, adjusting the checks for invalid channels so that enabled channel bits that would have been lost by shifting are also checked for validity. Only channels 0 to 15 are valid. Fixes: a8c66b684efaf ("staging: comedi: addi_apci_1500: rewrite the subdevice support functions") Cc: #4.0+: ef75e14a6c93: staging: comedi: verify array index is correct before using it Cc: #4.0+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20200717145257.112660-5-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- .../staging/comedi/drivers/addi_apci_1500.c | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/staging/comedi/drivers/addi_apci_1500.c b/drivers/staging/comedi/drivers/addi_apci_1500.c index 689acd69a1b9..816dd25b9d0e 100644 --- a/drivers/staging/comedi/drivers/addi_apci_1500.c +++ b/drivers/staging/comedi/drivers/addi_apci_1500.c @@ -452,13 +452,14 @@ static int apci1500_di_cfg_trig(struct comedi_device *dev, struct apci1500_private *devpriv = dev->private; unsigned int trig = data[1]; unsigned int shift = data[3]; - unsigned int hi_mask = data[4] << shift; - unsigned int lo_mask = data[5] << shift; - unsigned int chan_mask = hi_mask | lo_mask; - unsigned int old_mask = (1 << shift) - 1; + unsigned int hi_mask; + unsigned int lo_mask; + unsigned int chan_mask; + unsigned int old_mask; unsigned int pm; unsigned int pt; unsigned int pp; + unsigned int invalid_chan; if (trig > 1) { dev_dbg(dev->class_dev, @@ -466,7 +467,20 @@ static int apci1500_di_cfg_trig(struct comedi_device *dev, return -EINVAL; } - if (chan_mask > 0xffff) { + if (shift <= 16) { + hi_mask = data[4] << shift; + lo_mask = data[5] << shift; + old_mask = (1U << shift) - 1; + invalid_chan = (data[4] | data[5]) >> (16 - shift); + } else { + hi_mask = 0; + lo_mask = 0; + old_mask = 0xffff; + invalid_chan = data[4] | data[5]; + } + chan_mask = hi_mask | lo_mask; + + if (invalid_chan) { dev_dbg(dev->class_dev, "invalid digital trigger channel\n"); return -EINVAL; } From 926234f1b8434c4409aa4c53637aa3362ca07cea Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 17 Jul 2020 15:52:56 +0100 Subject: [PATCH 12/34] staging: comedi: addi_apci_1564: check INSN_CONFIG_DIGITAL_TRIG shift The `INSN_CONFIG` comedi instruction with sub-instruction code `INSN_CONFIG_DIGITAL_TRIG` includes a base channel in `data[3]`. This is used as a right shift amount for other bitmask values without being checked. Shift amounts greater than or equal to 32 will result in undefined behavior. Add code to deal with this. Fixes: 1e15687ea472 ("staging: comedi: addi_apci_1564: add Change-of-State interrupt subdevice and required functions") Cc: #3.17+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20200717145257.112660-4-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- .../staging/comedi/drivers/addi_apci_1564.c | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/staging/comedi/drivers/addi_apci_1564.c b/drivers/staging/comedi/drivers/addi_apci_1564.c index 10501fe6bb25..1268ba34be5f 100644 --- a/drivers/staging/comedi/drivers/addi_apci_1564.c +++ b/drivers/staging/comedi/drivers/addi_apci_1564.c @@ -331,14 +331,22 @@ static int apci1564_cos_insn_config(struct comedi_device *dev, unsigned int *data) { struct apci1564_private *devpriv = dev->private; - unsigned int shift, oldmask; + unsigned int shift, oldmask, himask, lomask; switch (data[0]) { case INSN_CONFIG_DIGITAL_TRIG: if (data[1] != 0) return -EINVAL; shift = data[3]; - oldmask = (1U << shift) - 1; + if (shift < 32) { + oldmask = (1U << shift) - 1; + himask = data[4] << shift; + lomask = data[5] << shift; + } else { + oldmask = 0xffffffffu; + himask = 0; + lomask = 0; + } switch (data[2]) { case COMEDI_DIGITAL_TRIG_DISABLE: devpriv->ctrl = 0; @@ -362,8 +370,8 @@ static int apci1564_cos_insn_config(struct comedi_device *dev, devpriv->mode2 &= oldmask; } /* configure specified channels */ - devpriv->mode1 |= data[4] << shift; - devpriv->mode2 |= data[5] << shift; + devpriv->mode1 |= himask; + devpriv->mode2 |= lomask; break; case COMEDI_DIGITAL_TRIG_ENABLE_LEVELS: if (devpriv->ctrl != (APCI1564_DI_IRQ_ENA | @@ -380,8 +388,8 @@ static int apci1564_cos_insn_config(struct comedi_device *dev, devpriv->mode2 &= oldmask; } /* configure specified channels */ - devpriv->mode1 |= data[4] << shift; - devpriv->mode2 |= data[5] << shift; + devpriv->mode1 |= himask; + devpriv->mode2 |= lomask; break; default: return -EINVAL; From 3f0dcfbcd2e162fc0a11c1f59b7acd42ee45f126 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 20 Jul 2020 10:54:35 +0800 Subject: [PATCH 13/34] scsi: core: Run queue in case of I/O resource contention failure I/O requests may be held in scheduler queue because of resource contention. The starvation scenario was handled properly in the regular completion path but we failed to account for it during I/O submission. This lead to the hang captured below. Make sure we run the queue when resource contention is encountered in the submission path. [ 39.054963] scsi 13:0:0:0: rejecting I/O to dead device [ 39.058700] scsi 13:0:0:0: rejecting I/O to dead device [ 39.087855] sd 13:0:0:1: [sdd] Synchronizing SCSI cache [ 39.088909] scsi 13:0:0:1: rejecting I/O to dead device [ 39.095351] scsi 13:0:0:1: rejecting I/O to dead device [ 39.096962] scsi 13:0:0:1: rejecting I/O to dead device [ 247.021859] INFO: task scsi-stress-rem:813 blocked for more than 122 seconds. [ 247.023258] Not tainted 5.8.0-rc2 #8 [ 247.024069] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 247.025331] scsi-stress-rem D 0 813 802 0x00004000 [ 247.025334] Call Trace: [ 247.025354] __schedule+0x504/0x55f [ 247.027987] schedule+0x72/0xa8 [ 247.027991] blk_mq_freeze_queue_wait+0x63/0x8c [ 247.027994] ? do_wait_intr_irq+0x7a/0x7a [ 247.027996] blk_cleanup_queue+0x4b/0xc9 [ 247.028000] __scsi_remove_device+0xf6/0x14e [ 247.028002] scsi_remove_device+0x21/0x2b [ 247.029037] sdev_store_delete+0x58/0x7c [ 247.029041] kernfs_fop_write+0x10d/0x14f [ 247.031281] vfs_write+0xa2/0xdf [ 247.032670] ksys_write+0x6b/0xb3 [ 247.032673] do_syscall_64+0x56/0x82 [ 247.034053] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 247.034059] RIP: 0033:0x7f69f39e9008 [ 247.036330] Code: Bad RIP value. [ 247.036331] RSP: 002b:00007ffdd8116498 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 247.037613] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f69f39e9008 [ 247.039714] RDX: 0000000000000002 RSI: 000055cde92a0ab0 RDI: 0000000000000001 [ 247.039715] RBP: 000055cde92a0ab0 R08: 000000000000000a R09: 00007f69f3a79e80 [ 247.039716] R10: 000000000000000a R11: 0000000000000246 R12: 00007f69f3abb780 [ 247.039717] R13: 0000000000000002 R14: 00007f69f3ab6740 R15: 0000000000000002 Link: https://lore.kernel.org/r/20200720025435.812030-1-ming.lei@redhat.com Cc: linux-block@vger.kernel.org Cc: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 0ba7a65e7c8d..06056e9ec333 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -547,6 +547,15 @@ static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd) scsi_uninit_cmd(cmd); } +static void scsi_run_queue_async(struct scsi_device *sdev) +{ + if (scsi_target(sdev)->single_lun || + !list_empty(&sdev->host->starved_list)) + kblockd_schedule_work(&sdev->requeue_work); + else + blk_mq_run_hw_queues(sdev->request_queue, true); +} + /* Returns false when no more bytes to process, true if there are more */ static bool scsi_end_request(struct request *req, blk_status_t error, unsigned int bytes) @@ -591,11 +600,7 @@ static bool scsi_end_request(struct request *req, blk_status_t error, __blk_mq_end_request(req, error); - if (scsi_target(sdev)->single_lun || - !list_empty(&sdev->host->starved_list)) - kblockd_schedule_work(&sdev->requeue_work); - else - blk_mq_run_hw_queues(q, true); + scsi_run_queue_async(sdev); percpu_ref_put(&q->q_usage_counter); return false; @@ -1702,6 +1707,7 @@ out_put_budget: */ if (req->rq_flags & RQF_DONTPREP) scsi_mq_uninit_cmd(cmd); + scsi_run_queue_async(sdev); break; } return ret; From b344d6a83d01c52fddbefa6b3b4764da5b1022a0 Mon Sep 17 00:00:00 2001 From: Liam Beguin Date: Sat, 18 Jul 2020 16:10:21 -0400 Subject: [PATCH 14/34] parisc: add support for cmpxchg on u8 pointers The kernel test bot reported[1] that using set_mask_bits on a u8 causes the following issue on parisc: hppa-linux-ld: drivers/phy/ti/phy-tusb1210.o: in function `tusb1210_probe': >> (.text+0x2f4): undefined reference to `__cmpxchg_called_with_bad_pointer' >> hppa-linux-ld: (.text+0x324): undefined reference to `__cmpxchg_called_with_bad_pointer' hppa-linux-ld: (.text+0x354): undefined reference to `__cmpxchg_called_with_bad_pointer' Add support for cmpxchg on u8 pointers. [1] https://lore.kernel.org/patchwork/patch/1272617/#1468946 Reported-by: kernel test robot Signed-off-by: Liam Beguin Tested-by: Dave Anglin Signed-off-by: Helge Deller --- arch/parisc/include/asm/cmpxchg.h | 2 ++ arch/parisc/lib/bitops.c | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h index ab5c215cf46c..068958575871 100644 --- a/arch/parisc/include/asm/cmpxchg.h +++ b/arch/parisc/include/asm/cmpxchg.h @@ -60,6 +60,7 @@ extern void __cmpxchg_called_with_bad_pointer(void); extern unsigned long __cmpxchg_u32(volatile unsigned int *m, unsigned int old, unsigned int new_); extern u64 __cmpxchg_u64(volatile u64 *ptr, u64 old, u64 new_); +extern u8 __cmpxchg_u8(volatile u8 *ptr, u8 old, u8 new_); /* don't worry...optimizer will get rid of most of this */ static inline unsigned long @@ -71,6 +72,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new_, int size) #endif case 4: return __cmpxchg_u32((unsigned int *)ptr, (unsigned int)old, (unsigned int)new_); + case 1: return __cmpxchg_u8((u8 *)ptr, (u8)old, (u8)new_); } __cmpxchg_called_with_bad_pointer(); return old; diff --git a/arch/parisc/lib/bitops.c b/arch/parisc/lib/bitops.c index 70ffbcf889b8..2e4d1f05a926 100644 --- a/arch/parisc/lib/bitops.c +++ b/arch/parisc/lib/bitops.c @@ -79,3 +79,15 @@ unsigned long __cmpxchg_u32(volatile unsigned int *ptr, unsigned int old, unsign _atomic_spin_unlock_irqrestore(ptr, flags); return (unsigned long)prev; } + +u8 __cmpxchg_u8(volatile u8 *ptr, u8 old, u8 new) +{ + unsigned long flags; + u8 prev; + + _atomic_spin_lock_irqsave(ptr, flags); + if ((prev = *ptr) == old) + *ptr = new; + _atomic_spin_unlock_irqrestore(ptr, flags); + return prev; +} From 5ce1a24dd98c00a57a8fa13660648abf7e08e3ef Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Fri, 10 Jul 2020 13:57:52 +0800 Subject: [PATCH 15/34] usb: xhci-mtk: fix the failure of bandwidth allocation The wMaxPacketSize field of endpoint descriptor may be zero as default value in alternate interface, and they are not actually selected when start stream, so skip them when try to allocate bandwidth. Cc: stable Fixes: 0cbd4b34cda9 ("xhci: mediatek: support MTK xHCI host controller") Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/1594360672-2076-1-git-send-email-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk-sch.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c index fea555570ad4..45c54d56ecbd 100644 --- a/drivers/usb/host/xhci-mtk-sch.c +++ b/drivers/usb/host/xhci-mtk-sch.c @@ -557,6 +557,10 @@ static bool need_bw_sch(struct usb_host_endpoint *ep, if (is_fs_or_ls(speed) && !has_tt) return false; + /* skip endpoint with zero maxpkt */ + if (usb_endpoint_maxp(&ep->desc) == 0) + return false; + return true; } From dbb0897e805f2ab1b8bc358f6c3d878a376b8897 Mon Sep 17 00:00:00 2001 From: Forest Crossman Date: Fri, 17 Jul 2020 06:27:34 -0500 Subject: [PATCH 16/34] usb: xhci: Fix ASM2142/ASM3142 DMA addressing The ASM2142/ASM3142 (same PCI IDs) does not support full 64-bit DMA addresses, which can cause silent memory corruption or IOMMU errors on platforms that use the upper bits. Add the XHCI_NO_64BIT_SUPPORT quirk to fix this issue. Signed-off-by: Forest Crossman Cc: stable Link: https://lore.kernel.org/r/20200717112734.328432-1-cyrozap@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index ef513c2fb843..9234c82e70e4 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -265,6 +265,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == 0x1142) xhci->quirks |= XHCI_TRUST_TX_LENGTH; + if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && + pdev->device == 0x2142) + xhci->quirks |= XHCI_NO_64BIT_SUPPORT; if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == PCI_DEVICE_ID_ASMEDIA_1042A_XHCI) From be6577af0cef934ccb036445314072e8cb9217b9 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 21 Jul 2020 07:36:59 -0400 Subject: [PATCH 17/34] parisc: Add atomic64_set_release() define to avoid CPU soft lockups Stalls are quite frequent with recent kernels. I enabled CONFIG_SOFTLOCKUP_DETECTOR and I caught the following stall: watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [cc1:22803] CPU: 0 PID: 22803 Comm: cc1 Not tainted 5.6.17+ #3 Hardware name: 9000/800/rp3440 IAOQ[0]: d_alloc_parallel+0x384/0x688 IAOQ[1]: d_alloc_parallel+0x388/0x688 RP(r2): d_alloc_parallel+0x134/0x688 Backtrace: [<000000004036974c>] __lookup_slow+0xa4/0x200 [<0000000040369fc8>] walk_component+0x288/0x458 [<000000004036a9a0>] path_lookupat+0x88/0x198 [<000000004036e748>] filename_lookup+0xa0/0x168 [<000000004036e95c>] user_path_at_empty+0x64/0x80 [<000000004035d93c>] vfs_statx+0x104/0x158 [<000000004035dfcc>] __do_sys_lstat64+0x44/0x80 [<000000004035e5a0>] sys_lstat64+0x20/0x38 [<0000000040180054>] syscall_exit+0x0/0x14 The code was stuck in this loop in d_alloc_parallel: 4037d414: 0e 00 10 dc ldd 0(r16),ret0 4037d418: c7 fc 5f ed bb,< ret0,1f,4037d414 4037d41c: 08 00 02 40 nop This is the inner loop of bit_spin_lock which is called by hlist_bl_unlock in d_alloc_parallel: static inline void bit_spin_lock(int bitnum, unsigned long *addr) { /* * Assuming the lock is uncontended, this never enters * the body of the outer loop. If it is contended, then * within the inner loop a non-atomic test is used to * busywait with less bus contention for a good time to * attempt to acquire the lock bit. */ preempt_disable(); #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) while (unlikely(test_and_set_bit_lock(bitnum, addr))) { preempt_enable(); do { cpu_relax(); } while (test_bit(bitnum, addr)); preempt_disable(); } #endif __acquire(bitlock); } After consideration, I realized that we must be losing bit unlocks. Then, I noticed that we missed defining atomic64_set_release(). Adding this define fixes the stalls in bit operations. Signed-off-by: Dave Anglin Cc: stable@vger.kernel.org Signed-off-by: Helge Deller --- arch/parisc/include/asm/atomic.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 118953d41763..6dd4171c9530 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -212,6 +212,8 @@ atomic64_set(atomic64_t *v, s64 i) _atomic_spin_unlock_irqrestore(v, flags); } +#define atomic64_set_release(v, i) atomic64_set((v), (i)) + static __inline__ s64 atomic64_read(const atomic64_t *v) { From ce684552a266cb1c7cc2f7e623f38567adec6653 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 12 Jul 2020 20:10:12 +0900 Subject: [PATCH 18/34] vt: Reject zero-sized screen buffer size. syzbot is reporting general protection fault in do_con_write() [1] caused by vc->vc_screenbuf == ZERO_SIZE_PTR caused by vc->vc_screenbuf_size == 0 caused by vc->vc_cols == vc->vc_rows == vc->vc_size_row == 0 caused by fb_set_var() from ioctl(FBIOPUT_VSCREENINFO) on /dev/fb0 , for gotoxy(vc, 0, 0) from reset_terminal() from vc_init() from vc_allocate() from con_install() from tty_init_dev() from tty_open() on such console causes vc->vc_pos == 0x10000000e due to ((unsigned long) ZERO_SIZE_PTR) + -1U * 0 + (-1U << 1). I don't think that a console with 0 column or 0 row makes sense. And it seems that vc_do_resize() does not intend to allow resizing a console to 0 column or 0 row due to new_cols = (cols ? cols : vc->vc_cols); new_rows = (lines ? lines : vc->vc_rows); exception. Theoretically, cols and rows can be any range as long as 0 < cols * rows * 2 <= KMALLOC_MAX_SIZE is satisfied (e.g. cols == 1048576 && rows == 2 is possible) because of vc->vc_size_row = vc->vc_cols << 1; vc->vc_screenbuf_size = vc->vc_rows * vc->vc_size_row; in visual_init() and kzalloc(vc->vc_screenbuf_size) in vc_allocate(). Since we can detect cols == 0 or rows == 0 via screenbuf_size = 0 in visual_init(), we can reject kzalloc(0). Then, vc_allocate() will return an error, and con_write() will not be called on a console with 0 column or 0 row. We need to make sure that integer overflow in visual_init() won't happen. Since vc_do_resize() restricts cols <= 32767 and rows <= 32767, applying 1 <= cols <= 32767 and 1 <= rows <= 32767 restrictions to vc_allocate() will be practically fine. This patch does not touch con_init(), for returning -EINVAL there does not help when we are not returning -ENOMEM. [1] https://syzkaller.appspot.com/bug?extid=017265e8553724e514e8 Reported-and-tested-by: syzbot Signed-off-by: Tetsuo Handa Cc: stable Link: https://lore.kernel.org/r/20200712111013.11881-1-penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 48a8199f7845..42d8c67a481f 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1092,10 +1092,19 @@ static const struct tty_port_operations vc_port_ops = { .destruct = vc_port_destruct, }; +/* + * Change # of rows and columns (0 means unchanged/the size of fg_console) + * [this is to be used together with some user program + * like resize that changes the hardware videomode] + */ +#define VC_MAXCOL (32767) +#define VC_MAXROW (32767) + int vc_allocate(unsigned int currcons) /* return 0 on success */ { struct vt_notifier_param param; struct vc_data *vc; + int err; WARN_CONSOLE_UNLOCKED(); @@ -1125,6 +1134,11 @@ int vc_allocate(unsigned int currcons) /* return 0 on success */ if (!*vc->vc_uni_pagedir_loc) con_set_default_unimap(vc); + err = -EINVAL; + if (vc->vc_cols > VC_MAXCOL || vc->vc_rows > VC_MAXROW || + vc->vc_screenbuf_size > KMALLOC_MAX_SIZE || !vc->vc_screenbuf_size) + goto err_free; + err = -ENOMEM; vc->vc_screenbuf = kzalloc(vc->vc_screenbuf_size, GFP_KERNEL); if (!vc->vc_screenbuf) goto err_free; @@ -1143,7 +1157,7 @@ err_free: visual_deinit(vc); kfree(vc); vc_cons[currcons].d = NULL; - return -ENOMEM; + return err; } static inline int resize_screen(struct vc_data *vc, int width, int height, @@ -1158,14 +1172,6 @@ static inline int resize_screen(struct vc_data *vc, int width, int height, return err; } -/* - * Change # of rows and columns (0 means unchanged/the size of fg_console) - * [this is to be used together with some user program - * like resize that changes the hardware videomode] - */ -#define VC_RESIZE_MAXCOL (32767) -#define VC_RESIZE_MAXROW (32767) - /** * vc_do_resize - resizing method for the tty * @tty: tty being resized @@ -1201,7 +1207,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, user = vc->vc_resize_user; vc->vc_resize_user = 0; - if (cols > VC_RESIZE_MAXCOL || lines > VC_RESIZE_MAXROW) + if (cols > VC_MAXCOL || lines > VC_MAXROW) return -EINVAL; new_cols = (cols ? cols : vc->vc_cols); @@ -1212,7 +1218,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, if (new_cols == vc->vc_cols && new_rows == vc->vc_rows) return 0; - if (new_screen_size > KMALLOC_MAX_SIZE) + if (new_screen_size > KMALLOC_MAX_SIZE || !new_screen_size) return -EINVAL; newscreen = kzalloc(new_screen_size, GFP_USER); if (!newscreen) @@ -3393,6 +3399,7 @@ static int __init con_init(void) INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK); tty_port_init(&vc->port); visual_init(vc, currcons, 1); + /* Assuming vc->vc_{cols,rows,screenbuf_size} are sane here. */ vc->vc_screenbuf = kzalloc(vc->vc_screenbuf_size, GFP_NOWAIT); vc_init(vc, vc->vc_rows, vc->vc_cols, currcons || !vc->vc_sw->con_save_screen); From 22a82fa7d6c3e16d56a036b1fa697a39b954adf0 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Mon, 13 Jul 2020 09:32:28 +0200 Subject: [PATCH 19/34] tty: xilinx_uartps: Really fix id assignment The problems started with the revert (18cc7ac8a28e28). The cdns_uart_console.index is statically assigned -1. When the port is registered, Linux assigns consecutive numbers to it. It turned out that when using ttyPS1 as console, the index is not updated as we are reusing the same cdns_uart_console instance for multiple ports. When registering ttyPS0, it gets updated from -1 to 0, but when registering ttyPS1, it already is 0 and not updated. That led to 2ae11c46d5fdc4. It assigns the index prior to registering the uart_driver once. Unfortunately, that ended up breaking the situation where the probe order does not match the id order. When using the same device tree for both uboot and linux, it is important that the serial0 alias points to the console. So some boards reverse those aliases. This was reported by Jan Kiszka. The proposed fix was reverting the index assignment and going back to the previous iteration. However such a reversed assignement (serial0 -> uart1, serial1 -> uart0) was already partially broken by the revert (18cc7ac8a28e28). While the ttyPS device works, the kmsg connection is already broken and kernel messages go missing. Reverting the id assignment does not fix this. >From the xilinx_uartps driver pov (after reverting the refactoring commits), there can be only one console. This manifests in static variables console_pprt and cdns_uart_console. These variables are not properly linked and can go out of sync. The cdns_uart_console.index is important for uart_add_one_port. We call that function for each port - one of which hopefully is the console. If it isn't, the CON_ENABLED flag is not set and console_port is cleared. The next cdns_uart_probe call then tries to register the next port using that same cdns_uart_console. It is important that console_port and cdns_uart_console (and its index in particular) stay in sync. The index assignment implemented by Shubhrajyoti Datta is correct in principle. It just may have to happen a second time if the first cdns_uart_probe call didn't encounter the console device. And we shouldn't change the index once the console uart is registered. Reported-by: Shubhrajyoti Datta Reported-by: Jan Kiszka Link: https://lore.kernel.org/linux-serial/f4092727-d8f5-5f91-2c9f-76643aace993@siemens.com/ Fixes: 18cc7ac8a28e28 ("Revert "serial: uartps: Register own uart console and driver structures"") Fixes: 2ae11c46d5fdc4 ("tty: xilinx_uartps: Fix missing id assignment to the console") Fixes: 76ed2e10579671 ("Revert "tty: xilinx_uartps: Fix missing id assignment to the console"") Signed-off-by: Helmut Grohne Cc: stable Link: https://lore.kernel.org/r/20200713073227.GA3805@laureti-dev Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/xilinx_uartps.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index 672cfa075e28..2833f1418d6d 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -1580,8 +1580,10 @@ static int cdns_uart_probe(struct platform_device *pdev) * If register_console() don't assign value, then console_port pointer * is cleanup. */ - if (!console_port) + if (!console_port) { + cdns_uart_console.index = id; console_port = port; + } #endif rc = uart_add_one_port(&cdns_uart_uart_driver, port); @@ -1594,8 +1596,10 @@ static int cdns_uart_probe(struct platform_device *pdev) #ifdef CONFIG_SERIAL_XILINX_PS_UART_CONSOLE /* This is not port which is used for console that's why clean it up */ if (console_port == port && - !(cdns_uart_uart_driver.cons->flags & CON_ENABLED)) + !(cdns_uart_uart_driver.cons->flags & CON_ENABLED)) { console_port = NULL; + cdns_uart_console.index = -1; + } #endif cdns_uart_data->cts_override = of_property_read_bool(pdev->dev.of_node, From b374c562ee7ab3f3a1daf959c01868bae761571c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 10 Jul 2020 15:59:46 +0200 Subject: [PATCH 20/34] serial: tegra: fix CREAD handling for PIO Commit 33ae787b74fc ("serial: tegra: add support to ignore read") added support for dropping input in case CREAD isn't set, but for PIO the ignore_status_mask wasn't checked until after the character had been put in the receive buffer. Note that the NULL tty-port test is bogus and will be removed by a follow-on patch. Fixes: 33ae787b74fc ("serial: tegra: add support to ignore read") Cc: stable # 5.4 Cc: Shardar Shariff Md Cc: Krishna Yarlagadda Signed-off-by: Johan Hovold Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20200710135947.2737-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial-tegra.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c index 8de8bac9c6c7..b3bbee6b6702 100644 --- a/drivers/tty/serial/serial-tegra.c +++ b/drivers/tty/serial/serial-tegra.c @@ -653,11 +653,14 @@ static void tegra_uart_handle_rx_pio(struct tegra_uart_port *tup, ch = (unsigned char) tegra_uart_read(tup, UART_RX); tup->uport.icount.rx++; - if (!uart_handle_sysrq_char(&tup->uport, ch) && tty) - tty_insert_flip_char(tty, ch, flag); + if (uart_handle_sysrq_char(&tup->uport, ch)) + continue; if (tup->uport.ignore_status_mask & UART_LSR_DR) continue; + + if (tty) + tty_insert_flip_char(tty, ch, flag); } while (1); } From 707631ce639651e51bfed9e56326cde86f9e97b8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 10 Jul 2020 15:59:47 +0200 Subject: [PATCH 21/34] serial: tegra: drop bogus NULL tty-port checks The struct tty_port is part of the uart state and will never be NULL in the receive helpers. Drop the bogus NULL checks and rename the pointer-variables "port" to differentiate them from struct tty_struct pointers (which can be NULL). Fixes: 962963e4ee23 ("serial: tegra: Switch to using struct tty_port") Signed-off-by: Johan Hovold Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20200710135947.2737-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial-tegra.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c index b3bbee6b6702..04d1b0807e66 100644 --- a/drivers/tty/serial/serial-tegra.c +++ b/drivers/tty/serial/serial-tegra.c @@ -635,7 +635,7 @@ static void tegra_uart_handle_tx_pio(struct tegra_uart_port *tup) } static void tegra_uart_handle_rx_pio(struct tegra_uart_port *tup, - struct tty_port *tty) + struct tty_port *port) { do { char flag = TTY_NORMAL; @@ -659,13 +659,12 @@ static void tegra_uart_handle_rx_pio(struct tegra_uart_port *tup, if (tup->uport.ignore_status_mask & UART_LSR_DR) continue; - if (tty) - tty_insert_flip_char(tty, ch, flag); + tty_insert_flip_char(port, ch, flag); } while (1); } static void tegra_uart_copy_rx_to_tty(struct tegra_uart_port *tup, - struct tty_port *tty, + struct tty_port *port, unsigned int count) { int copied; @@ -675,17 +674,13 @@ static void tegra_uart_copy_rx_to_tty(struct tegra_uart_port *tup, return; tup->uport.icount.rx += count; - if (!tty) { - dev_err(tup->uport.dev, "No tty port\n"); - return; - } if (tup->uport.ignore_status_mask & UART_LSR_DR) return; dma_sync_single_for_cpu(tup->uport.dev, tup->rx_dma_buf_phys, count, DMA_FROM_DEVICE); - copied = tty_insert_flip_string(tty, + copied = tty_insert_flip_string(port, ((unsigned char *)(tup->rx_dma_buf_virt)), count); if (copied != count) { WARN_ON(1); From f4c23a140d80ef5e6d3d1f8f57007649014b60fa Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 21 Jul 2020 14:38:52 +0000 Subject: [PATCH 22/34] serial: 8250: fix null-ptr-deref in serial8250_start_tx() I got null-ptr-deref in serial8250_start_tx(): [ 78.114630] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 78.123778] Mem abort info: [ 78.126560] ESR = 0x86000007 [ 78.129603] EC = 0x21: IABT (current EL), IL = 32 bits [ 78.134891] SET = 0, FnV = 0 [ 78.137933] EA = 0, S1PTW = 0 [ 78.141064] user pgtable: 64k pages, 48-bit VAs, pgdp=00000027d41a8600 [ 78.147562] [0000000000000000] pgd=00000027893f0003, p4d=00000027893f0003, pud=00000027893f0003, pmd=00000027c9a20003, pte=0000000000000000 [ 78.160029] Internal error: Oops: 86000007 [#1] SMP [ 78.164886] Modules linked in: sunrpc vfat fat aes_ce_blk crypto_simd cryptd aes_ce_cipher crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce ses enclosure sg sbsa_gwdt ipmi_ssif spi_dw_mmio sch_fq_codel vhost_net tun vhost vhost_iotlb tap ip_tables ext4 mbcache jbd2 ahci hisi_sas_v3_hw libahci hisi_sas_main libsas hns3 scsi_transport_sas hclge libata megaraid_sas ipmi_si hnae3 ipmi_devintf ipmi_msghandler br_netfilter bridge stp llc nvme nvme_core xt_sctp sctp libcrc32c dm_mod nbd [ 78.207383] CPU: 11 PID: 23258 Comm: null-ptr Not tainted 5.8.0-rc6+ #48 [ 78.214056] Hardware name: Huawei TaiShan 2280 V2/BC82AMDC, BIOS 2280-V2 CS V3.B210.01 03/12/2020 [ 78.222888] pstate: 80400089 (Nzcv daIf +PAN -UAO BTYPE=--) [ 78.228435] pc : 0x0 [ 78.230618] lr : serial8250_start_tx+0x160/0x260 [ 78.235215] sp : ffff800062eefb80 [ 78.238517] x29: ffff800062eefb80 x28: 0000000000000fff [ 78.243807] x27: ffff800062eefd80 x26: ffff202fd83b3000 [ 78.249098] x25: ffff800062eefd80 x24: ffff202fd83b3000 [ 78.254388] x23: ffff002fc5e50be8 x22: 0000000000000002 [ 78.259679] x21: 0000000000000001 x20: 0000000000000000 [ 78.264969] x19: ffffa688827eecc8 x18: 0000000000000000 [ 78.270259] x17: 0000000000000000 x16: 0000000000000000 [ 78.275550] x15: ffffa68881bc67a8 x14: 00000000000002e6 [ 78.280841] x13: ffffa68881bc67a8 x12: 000000000000c539 [ 78.286131] x11: d37a6f4de9bd37a7 x10: ffffa68881cccff0 [ 78.291421] x9 : ffffa68881bc6000 x8 : ffffa688819daa88 [ 78.296711] x7 : ffffa688822a0f20 x6 : ffffa688819e0000 [ 78.302002] x5 : ffff800062eef9d0 x4 : ffffa68881e707a8 [ 78.307292] x3 : 0000000000000000 x2 : 0000000000000002 [ 78.312582] x1 : 0000000000000001 x0 : ffffa688827eecc8 [ 78.317873] Call trace: [ 78.320312] 0x0 [ 78.322147] __uart_start.isra.9+0x64/0x78 [ 78.326229] uart_start+0xb8/0x1c8 [ 78.329620] uart_flush_chars+0x24/0x30 [ 78.333442] n_tty_receive_buf_common+0x7b0/0xc30 [ 78.338128] n_tty_receive_buf+0x44/0x2c8 [ 78.342122] tty_ioctl+0x348/0x11f8 [ 78.345599] ksys_ioctl+0xd8/0xf8 [ 78.348903] __arm64_sys_ioctl+0x2c/0xc8 [ 78.352812] el0_svc_common.constprop.2+0x88/0x1b0 [ 78.357583] do_el0_svc+0x44/0xd0 [ 78.360887] el0_sync_handler+0x14c/0x1d0 [ 78.364880] el0_sync+0x140/0x180 [ 78.368185] Code: bad PC value SERIAL_PORT_DFNS is not defined on each arch, if it's not defined, serial8250_set_defaults() won't be called in serial8250_isa_init_ports(), so the p->serial_in pointer won't be initialized, and it leads a null-ptr-deref. Fix this problem by calling serial8250_set_defaults() after init uart port. Signed-off-by: Yang Yingliang Cc: stable Link: https://lore.kernel.org/r/20200721143852.4058352-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index fc118f649887..cae61d1ebec5 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -524,6 +524,7 @@ static void __init serial8250_isa_init_ports(void) */ up->mcr_mask = ~ALPHA_KLUDGE_MCR; up->mcr_force = ALPHA_KLUDGE_MCR; + serial8250_set_defaults(up); } /* chain base port ops to support Remote Supervisor Adapter */ @@ -547,7 +548,6 @@ static void __init serial8250_isa_init_ports(void) port->membase = old_serial_port[i].iomem_base; port->iotype = old_serial_port[i].io_type; port->regshift = old_serial_port[i].iomem_reg_shift; - serial8250_set_defaults(up); port->irqflags |= irqflag; if (serial8250_isa_config != NULL) From 551e553f0d4ab623e2a6f424ab5834f9c7b5229c Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Tue, 14 Jul 2020 15:41:12 +0300 Subject: [PATCH 23/34] serial: 8250_mtk: Fix high-speed baud rates clamping Commit 7b668c064ec3 ("serial: 8250: Fix max baud limit in generic 8250 port") fixed limits of a baud rate setting for a generic 8250 port. In other words since that commit the baud rate has been permitted to be within [uartclk / 16 / UART_DIV_MAX; uartclk / 16], which is absolutely normal for a standard 8250 UART port. But there are custom 8250 ports, which provide extended baud rate limits. In particular the Mediatek 8250 port can work with baud rates up to "uartclk" speed. Normally that and any other peculiarity is supposed to be handled in a custom set_termios() callback implemented in the vendor-specific 8250-port glue-driver. Currently that is how it's done for the most of the vendor-specific 8250 ports, but for some reason for Mediatek a solution has been spread out to both the glue-driver and to the generic 8250-port code. Due to that a bug has been introduced, which permitted the extended baud rate limit for all even for standard 8250-ports. The bug has been fixed by the commit 7b668c064ec3 ("serial: 8250: Fix max baud limit in generic 8250 port") by narrowing the baud rates limit back down to the normal bounds. Unfortunately by doing so we also broke the Mediatek-specific extended bauds feature. A fix of the problem described above is twofold. First since we can't get back the extended baud rate limits feature to the generic set_termios() function and that method supports only a standard baud rates range, the requested baud rate must be locally stored before calling it and then restored back to the new termios structure after the generic set_termios() finished its magic business. By doing so we still use the serial8250_do_set_termios() method to set the LCR/MCR/FCR/etc. registers, while the extended baud rate setting procedure will be performed later in the custom Mediatek-specific set_termios() callback. Second since a true baud rate is now fully calculated in the custom set_termios() method we need to locally update the port timeout by calling the uart_update_timeout() function. After the fixes described above are implemented in the 8250_mtk.c driver, the Mediatek 8250-port should get back to normally working with extended baud rates. Link: https://lore.kernel.org/linux-serial/20200701211337.3027448-1-danielwinkler@google.com Fixes: 7b668c064ec3 ("serial: 8250: Fix max baud limit in generic 8250 port") Reported-by: Daniel Winkler Signed-off-by: Serge Semin Cc: stable Tested-by: Claire Chang Link: https://lore.kernel.org/r/20200714124113.20918-1-Sergey.Semin@baikalelectronics.ru Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_mtk.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index f839380c2f4c..98b8a3e30733 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -306,8 +306,21 @@ mtk8250_set_termios(struct uart_port *port, struct ktermios *termios, } #endif + /* + * Store the requested baud rate before calling the generic 8250 + * set_termios method. Standard 8250 port expects bauds to be + * no higher than (uartclk / 16) so the baud will be clamped if it + * gets out of that bound. Mediatek 8250 port supports speed + * higher than that, therefore we'll get original baud rate back + * after calling the generic set_termios method and recalculate + * the speed later in this method. + */ + baud = tty_termios_baud_rate(termios); + serial8250_do_set_termios(port, termios, old); + tty_termios_encode_baud_rate(termios, baud, baud); + /* * Mediatek UARTs use an extra highspeed register (MTK_UART_HIGHS) * @@ -339,6 +352,11 @@ mtk8250_set_termios(struct uart_port *port, struct ktermios *termios, */ spin_lock_irqsave(&port->lock, flags); + /* + * Update the per-port timeout. + */ + uart_update_timeout(port, termios->c_cflag, baud); + /* set DLAB we have cval saved in up->lcr from the call to the core */ serial_port_out(port, UART_LCR, up->lcr | UART_LCR_DLAB); serial_dl_write(up, quot); From faaff9765664009c1c7c65551d32e9ed3b1dda8f Mon Sep 17 00:00:00 2001 From: Rustam Kovhaev Date: Wed, 22 Jul 2020 09:10:52 -0700 Subject: [PATCH 24/34] staging: wlan-ng: properly check endpoint types As syzkaller detected, wlan-ng driver does not do sanity check of endpoints in prism2sta_probe_usb(), add check for xfer direction and type Reported-and-tested-by: syzbot+c2a1fa67c02faa0de723@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?extid=c2a1fa67c02faa0de723 Signed-off-by: Rustam Kovhaev Cc: stable Link: https://lore.kernel.org/r/20200722161052.999754-1-rkovhaev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wlan-ng/prism2usb.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/staging/wlan-ng/prism2usb.c b/drivers/staging/wlan-ng/prism2usb.c index 4689b2170e4f..456603fd26c0 100644 --- a/drivers/staging/wlan-ng/prism2usb.c +++ b/drivers/staging/wlan-ng/prism2usb.c @@ -61,11 +61,25 @@ static int prism2sta_probe_usb(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_device *dev; - + const struct usb_endpoint_descriptor *epd; + const struct usb_host_interface *iface_desc = interface->cur_altsetting; struct wlandevice *wlandev = NULL; struct hfa384x *hw = NULL; int result = 0; + if (iface_desc->desc.bNumEndpoints != 2) { + result = -ENODEV; + goto failed; + } + + result = -EINVAL; + epd = &iface_desc->endpoint[1].desc; + if (!usb_endpoint_is_bulk_in(epd)) + goto failed; + epd = &iface_desc->endpoint[2].desc; + if (!usb_endpoint_is_bulk_out(epd)) + goto failed; + dev = interface_to_usbdev(interface); wlandev = create_wlan(); if (!wlandev) { From ca9b31f6bb9c6aa9b4e5f0792f39a97bbffb8c51 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 21 Jul 2020 10:31:23 -0700 Subject: [PATCH 25/34] Makefile: Fix GCC_TOOLCHAIN_DIR prefix for Clang cross compilation When CROSS_COMPILE is set (e.g. aarch64-linux-gnu-), if $(CROSS_COMPILE)elfedit is found at /usr/bin/aarch64-linux-gnu-elfedit, GCC_TOOLCHAIN_DIR will be set to /usr/bin/. --prefix= will be set to /usr/bin/ and Clang as of 11 will search for both $(prefix)aarch64-linux-gnu-$needle and $(prefix)$needle. GCC searchs for $(prefix)aarch64-linux-gnu/$version/$needle, $(prefix)aarch64-linux-gnu/$needle and $(prefix)$needle. In practice, $(prefix)aarch64-linux-gnu/$needle rarely contains executables. To better model how GCC's -B/--prefix takes in effect in practice, newer Clang (since https://github.com/llvm/llvm-project/commit/3452a0d8c17f7166f479706b293caf6ac76ffd90) only searches for $(prefix)$needle. Currently it will find /usr/bin/as instead of /usr/bin/aarch64-linux-gnu-as. Set --prefix= to $(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) (/usr/bin/aarch64-linux-gnu-) so that newer Clang can find the appropriate cross compiling GNU as (when -no-integrated-as is in effect). Cc: stable@vger.kernel.org Reported-by: Nathan Chancellor Signed-off-by: Fangrui Song Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Tested-by: Nick Desaulniers Link: https://github.com/ClangBuiltLinux/linux/issues/1099 Reviewed-by: Nick Desaulniers Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 676f1cfb1d56..9d9d4166c0be 100644 --- a/Makefile +++ b/Makefile @@ -567,7 +567,7 @@ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),) ifneq ($(CROSS_COMPILE),) CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%)) GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)elfedit)) -CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR) +CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..) endif ifneq ($(GCC_TOOLCHAIN),) From f867c771f98891841c217fa8459244ed0dd28921 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 17 Jul 2020 00:12:15 +0900 Subject: [PATCH 26/34] binder: Don't use mmput() from shrinker function. syzbot is reporting that mmput() from shrinker function has a risk of deadlock [1], for delayed_uprobe_add() from update_ref_ctr() calls kzalloc(GFP_KERNEL) with delayed_uprobe_lock held, and uprobe_clear_state() from __mmput() also holds delayed_uprobe_lock. Commit a1b2289cef92ef0e ("android: binder: drop lru lock in isolate callback") replaced mmput() with mmput_async() in order to avoid sleeping with spinlock held. But this patch replaces mmput() with mmput_async() in order not to start __mmput() from shrinker context. [1] https://syzkaller.appspot.com/bug?id=bc9e7303f537c41b2b0cc2dfcea3fc42964c2d45 Reported-by: syzbot Reported-by: syzbot Signed-off-by: Tetsuo Handa Reviewed-by: Michal Hocko Acked-by: Todd Kjos Acked-by: Christian Brauner Cc: stable Link: https://lore.kernel.org/r/4ba9adb2-43f5-2de0-22de-f6075c1fab50@i-love.sakura.ne.jp Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 42c672f1584e..cbe6aa77d50d 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -947,7 +947,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item, trace_binder_unmap_user_end(alloc, index); } mmap_read_unlock(mm); - mmput(mm); + mmput_async(mm); trace_binder_unmap_kernel_start(alloc, index); From b34e7e298d7a5ed76b3aa327c240c29f1ef6dd22 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 15 Jul 2020 23:05:53 -0700 Subject: [PATCH 27/34] /dev/mem: Add missing memory barriers for devmem_inode WRITE_ONCE() isn't the correct way to publish a pointer to a data structure, since it doesn't include a write memory barrier. Therefore other tasks may see that the pointer has been set but not see that the pointed-to memory has finished being initialized yet. Instead a primitive with "release" semantics is needed. Use smp_store_release() for this. The use of READ_ONCE() on the read side is still potentially correct if there's no control dependency, i.e. if all memory being "published" is transitively reachable via the pointer itself. But this pairing is somewhat confusing and error-prone. So just upgrade the read side to smp_load_acquire() so that it clearly pairs with smp_store_release(). Cc: Arnd Bergmann Cc: Ingo Molnar Cc: Kees Cook Cc: Matthew Wilcox Cc: Russell King Cc: Andrew Morton Fixes: 3234ac664a87 ("/dev/mem: Revoke mappings when a driver claims the region") Signed-off-by: Eric Biggers Cc: stable Acked-by: Dan Williams Link: https://lore.kernel.org/r/20200716060553.24618-1-ebiggers@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/char/mem.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 934c92dcb9ab..687d4af6945d 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -814,7 +814,8 @@ static struct inode *devmem_inode; #ifdef CONFIG_IO_STRICT_DEVMEM void revoke_devmem(struct resource *res) { - struct inode *inode = READ_ONCE(devmem_inode); + /* pairs with smp_store_release() in devmem_init_inode() */ + struct inode *inode = smp_load_acquire(&devmem_inode); /* * Check that the initialization has completed. Losing the race @@ -1028,8 +1029,11 @@ static int devmem_init_inode(void) return rc; } - /* publish /dev/mem initialized */ - WRITE_ONCE(devmem_inode, inode); + /* + * Publish /dev/mem initialized. + * Pairs with smp_load_acquire() in revoke_devmem(). + */ + smp_store_release(&devmem_inode, inode); return 0; } From 91b44981a2316e7b00574d32dec4fae356444dcf Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Thu, 23 Jul 2020 11:37:34 +0300 Subject: [PATCH 28/34] interconnect: Do not skip aggregation for disabled paths When an interconnect path is being disabled, currently we don't aggregate the requests for it afterwards. But the re-aggregation step shouldn't be skipped, as it may leave the nodes with outdated bandwidth data. This outdated data may actually keep the path still enabled and prevent the device from going into lower power states. Reported-by: Atul Dhudase Fixes: 7d374b209083 ("interconnect: Add helpers for enabling/disabling a path") Reviewed-by: Sibi Sankar Tested-by: Atul Dhudase Reviewed-by: Atul Dhudase Link: https://lore.kernel.org/r/20200721120740.3436-1-georgi.djakov@linaro.org Signed-off-by: Georgi Djakov Link: https://lore.kernel.org/r/20200723083735.5616-2-georgi.djakov@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/interconnect/core.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index e5f998744501..9e1ab701785c 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -243,6 +243,7 @@ static int aggregate_requests(struct icc_node *node) { struct icc_provider *p = node->provider; struct icc_req *r; + u32 avg_bw, peak_bw; node->avg_bw = 0; node->peak_bw = 0; @@ -251,9 +252,14 @@ static int aggregate_requests(struct icc_node *node) p->pre_aggregate(node); hlist_for_each_entry(r, &node->req_list, req_node) { - if (!r->enabled) - continue; - p->aggregate(node, r->tag, r->avg_bw, r->peak_bw, + if (r->enabled) { + avg_bw = r->avg_bw; + peak_bw = r->peak_bw; + } else { + avg_bw = 0; + peak_bw = 0; + } + p->aggregate(node, r->tag, avg_bw, peak_bw, &node->avg_bw, &node->peak_bw); } From 92d232d176041db5b033dd7b7f7f2cf343f82237 Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Thu, 23 Jul 2020 11:37:35 +0300 Subject: [PATCH 29/34] interconnect: msm8916: Fix buswidth of pcnoc_s nodes The buswidth of the pcnoc_s_* nodes is actually not 8, but 4 bytes. Let's fix it. Reported-by: Jun Nie Reviewed-by: Mike Tipton Fixes: 30c8fa3ec61a ("interconnect: qcom: Add MSM8916 interconnect provider driver") Link: https://lore.kernel.org/r/20200709130004.12462-1-georgi.djakov@linaro.org Signed-off-by: Georgi Djakov Cc: stable Link: https://lore.kernel.org/r/20200723083735.5616-3-georgi.djakov@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/interconnect/qcom/msm8916.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/interconnect/qcom/msm8916.c b/drivers/interconnect/qcom/msm8916.c index e94f3c5228b7..42c6c5581662 100644 --- a/drivers/interconnect/qcom/msm8916.c +++ b/drivers/interconnect/qcom/msm8916.c @@ -197,13 +197,13 @@ DEFINE_QNODE(pcnoc_int_0, MSM8916_PNOC_INT_0, 8, -1, -1, MSM8916_PNOC_SNOC_MAS, DEFINE_QNODE(pcnoc_int_1, MSM8916_PNOC_INT_1, 8, -1, -1, MSM8916_PNOC_SNOC_MAS); DEFINE_QNODE(pcnoc_m_0, MSM8916_PNOC_MAS_0, 8, -1, -1, MSM8916_PNOC_INT_0); DEFINE_QNODE(pcnoc_m_1, MSM8916_PNOC_MAS_1, 8, -1, -1, MSM8916_PNOC_SNOC_MAS); -DEFINE_QNODE(pcnoc_s_0, MSM8916_PNOC_SLV_0, 8, -1, -1, MSM8916_SLAVE_CLK_CTL, MSM8916_SLAVE_TLMM, MSM8916_SLAVE_TCSR, MSM8916_SLAVE_SECURITY, MSM8916_SLAVE_MSS); -DEFINE_QNODE(pcnoc_s_1, MSM8916_PNOC_SLV_1, 8, -1, -1, MSM8916_SLAVE_IMEM_CFG, MSM8916_SLAVE_CRYPTO_0_CFG, MSM8916_SLAVE_MSG_RAM, MSM8916_SLAVE_PDM, MSM8916_SLAVE_PRNG); -DEFINE_QNODE(pcnoc_s_2, MSM8916_PNOC_SLV_2, 8, -1, -1, MSM8916_SLAVE_SPDM, MSM8916_SLAVE_BOOT_ROM, MSM8916_SLAVE_BIMC_CFG, MSM8916_SLAVE_PNOC_CFG, MSM8916_SLAVE_PMIC_ARB); -DEFINE_QNODE(pcnoc_s_3, MSM8916_PNOC_SLV_3, 8, -1, -1, MSM8916_SLAVE_MPM, MSM8916_SLAVE_SNOC_CFG, MSM8916_SLAVE_RBCPR_CFG, MSM8916_SLAVE_QDSS_CFG, MSM8916_SLAVE_DEHR_CFG); -DEFINE_QNODE(pcnoc_s_4, MSM8916_PNOC_SLV_4, 8, -1, -1, MSM8916_SLAVE_VENUS_CFG, MSM8916_SLAVE_CAMERA_CFG, MSM8916_SLAVE_DISPLAY_CFG); -DEFINE_QNODE(pcnoc_s_8, MSM8916_PNOC_SLV_8, 8, -1, -1, MSM8916_SLAVE_USB_HS, MSM8916_SLAVE_SDCC_1, MSM8916_SLAVE_BLSP_1); -DEFINE_QNODE(pcnoc_s_9, MSM8916_PNOC_SLV_9, 8, -1, -1, MSM8916_SLAVE_SDCC_2, MSM8916_SLAVE_LPASS, MSM8916_SLAVE_GRAPHICS_3D_CFG); +DEFINE_QNODE(pcnoc_s_0, MSM8916_PNOC_SLV_0, 4, -1, -1, MSM8916_SLAVE_CLK_CTL, MSM8916_SLAVE_TLMM, MSM8916_SLAVE_TCSR, MSM8916_SLAVE_SECURITY, MSM8916_SLAVE_MSS); +DEFINE_QNODE(pcnoc_s_1, MSM8916_PNOC_SLV_1, 4, -1, -1, MSM8916_SLAVE_IMEM_CFG, MSM8916_SLAVE_CRYPTO_0_CFG, MSM8916_SLAVE_MSG_RAM, MSM8916_SLAVE_PDM, MSM8916_SLAVE_PRNG); +DEFINE_QNODE(pcnoc_s_2, MSM8916_PNOC_SLV_2, 4, -1, -1, MSM8916_SLAVE_SPDM, MSM8916_SLAVE_BOOT_ROM, MSM8916_SLAVE_BIMC_CFG, MSM8916_SLAVE_PNOC_CFG, MSM8916_SLAVE_PMIC_ARB); +DEFINE_QNODE(pcnoc_s_3, MSM8916_PNOC_SLV_3, 4, -1, -1, MSM8916_SLAVE_MPM, MSM8916_SLAVE_SNOC_CFG, MSM8916_SLAVE_RBCPR_CFG, MSM8916_SLAVE_QDSS_CFG, MSM8916_SLAVE_DEHR_CFG); +DEFINE_QNODE(pcnoc_s_4, MSM8916_PNOC_SLV_4, 4, -1, -1, MSM8916_SLAVE_VENUS_CFG, MSM8916_SLAVE_CAMERA_CFG, MSM8916_SLAVE_DISPLAY_CFG); +DEFINE_QNODE(pcnoc_s_8, MSM8916_PNOC_SLV_8, 4, -1, -1, MSM8916_SLAVE_USB_HS, MSM8916_SLAVE_SDCC_1, MSM8916_SLAVE_BLSP_1); +DEFINE_QNODE(pcnoc_s_9, MSM8916_PNOC_SLV_9, 4, -1, -1, MSM8916_SLAVE_SDCC_2, MSM8916_SLAVE_LPASS, MSM8916_SLAVE_GRAPHICS_3D_CFG); DEFINE_QNODE(pcnoc_snoc_mas, MSM8916_PNOC_SNOC_MAS, 8, 29, -1, MSM8916_PNOC_SNOC_SLV); DEFINE_QNODE(pcnoc_snoc_slv, MSM8916_PNOC_SNOC_SLV, 8, -1, 45, MSM8916_SNOC_INT_0, MSM8916_SNOC_INT_BIMC, MSM8916_SNOC_INT_1); DEFINE_QNODE(qdss_int, MSM8916_SNOC_QDSS_INT, 8, -1, -1, MSM8916_SNOC_INT_0, MSM8916_SNOC_INT_BIMC); From 0b987032f8b58ef51cc8a042f46cc0cf1f277172 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Wed, 15 Jul 2020 12:38:42 +0100 Subject: [PATCH 30/34] usb: tegra: Fix allocation for the FPCI context Commit 5c4e8d3781bc ("usb: host: xhci-tegra: Add support for XUSB context save/restore") is using the IPFS 'num_offsets' value when allocating memory for FPCI context instead of the FPCI 'num_offsets'. After commit cad064f1bd52 ("devres: handle zero size in devm_kmalloc()") was added system suspend started failing on Tegra186. The kernel log showed that the Tegra XHCI driver was crashing on entry to suspend when attempting the save the USB context. On Tegra186, the IPFS context has a zero length but the FPCI content has a non-zero length, and because of the bug in the Tegra XHCI driver we are incorrectly allocating a zero length array for the FPCI context. The crash seen on entering suspend when we attempt to save the FPCI context and following commit cad064f1bd52 ("devres: handle zero size in devm_kmalloc()") this now causes a NULL pointer deference when we access the memory. Fix this by correcting the amount of memory we are allocating for FPCI contexts. Cc: stable@vger.kernel.org Fixes: 5c4e8d3781bc ("usb: host: xhci-tegra: Add support for XUSB context save/restore") Signed-off-by: Jon Hunter Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20200715113842.30680-1-jonathanh@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index 2eaf5c0af80c..ee6bf01775bb 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -856,7 +856,7 @@ static int tegra_xusb_init_context(struct tegra_xusb *tegra) if (!tegra->context.ipfs) return -ENOMEM; - tegra->context.fpci = devm_kcalloc(tegra->dev, soc->ipfs.num_offsets, + tegra->context.fpci = devm_kcalloc(tegra->dev, soc->fpci.num_offsets, sizeof(u32), GFP_KERNEL); if (!tegra->context.fpci) return -ENOMEM; From 033724d6864245a11f8e04c066002e6ad22b3fd0 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 15 Jul 2020 10:51:02 +0900 Subject: [PATCH 31/34] fbdev: Detect integer underflow at "struct fbcon_ops"->clear_margins. syzbot is reporting general protection fault in bitfill_aligned() [1] caused by integer underflow in bit_clear_margins(). The cause of this problem is when and how do_vc_resize() updates vc->vc_{cols,rows}. If vc_do_resize() fails (e.g. kzalloc() fails) when var.xres or var.yres is going to shrink, vc->vc_{cols,rows} will not be updated. This allows bit_clear_margins() to see info->var.xres < (vc->vc_cols * cw) or info->var.yres < (vc->vc_rows * ch). Unexpectedly large rw or bh will try to overrun the __iomem region and causes general protection fault. Also, vc_resize(vc, 0, 0) does not set vc->vc_{cols,rows} = 0 due to new_cols = (cols ? cols : vc->vc_cols); new_rows = (lines ? lines : vc->vc_rows); exception. Since cols and lines are calculated as cols = FBCON_SWAP(ops->rotate, info->var.xres, info->var.yres); rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres); cols /= vc->vc_font.width; rows /= vc->vc_font.height; vc_resize(vc, cols, rows); in fbcon_modechanged(), var.xres < vc->vc_font.width makes cols = 0 and var.yres < vc->vc_font.height makes rows = 0. This means that const int fd = open("/dev/fb0", O_ACCMODE); struct fb_var_screeninfo var = { }; ioctl(fd, FBIOGET_VSCREENINFO, &var); var.xres = var.yres = 1; ioctl(fd, FBIOPUT_VSCREENINFO, &var); easily reproduces integer underflow bug explained above. Of course, callers of vc_resize() are not handling vc_do_resize() failure is bad. But we can't avoid vc_resize(vc, 0, 0) which returns 0. Therefore, as a band-aid workaround, this patch checks integer underflow in "struct fbcon_ops"->clear_margins call, assuming that vc->vc_cols * vc->vc_font.width and vc->vc_rows * vc->vc_font.heigh do not cause integer overflow. [1] https://syzkaller.appspot.com/bug?id=a565882df74fa76f10d3a6fec4be31098dbb37c6 Reported-and-tested-by: syzbot Signed-off-by: Tetsuo Handa Acked-by: Daniel Vetter Cc: stable Link: https://lore.kernel.org/r/20200715015102.3814-1-penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/core/bitblit.c | 4 ++-- drivers/video/fbdev/core/fbcon_ccw.c | 4 ++-- drivers/video/fbdev/core/fbcon_cw.c | 4 ++-- drivers/video/fbdev/core/fbcon_ud.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/video/fbdev/core/bitblit.c b/drivers/video/fbdev/core/bitblit.c index ca935c09a261..35ebeeccde4d 100644 --- a/drivers/video/fbdev/core/bitblit.c +++ b/drivers/video/fbdev/core/bitblit.c @@ -216,7 +216,7 @@ static void bit_clear_margins(struct vc_data *vc, struct fb_info *info, region.color = color; region.rop = ROP_COPY; - if (rw && !bottom_only) { + if ((int) rw > 0 && !bottom_only) { region.dx = info->var.xoffset + rs; region.dy = 0; region.width = rw; @@ -224,7 +224,7 @@ static void bit_clear_margins(struct vc_data *vc, struct fb_info *info, info->fbops->fb_fillrect(info, ®ion); } - if (bh) { + if ((int) bh > 0) { region.dx = info->var.xoffset; region.dy = info->var.yoffset + bs; region.width = rs; diff --git a/drivers/video/fbdev/core/fbcon_ccw.c b/drivers/video/fbdev/core/fbcon_ccw.c index dfa9a8aa4509..78f3a5621478 100644 --- a/drivers/video/fbdev/core/fbcon_ccw.c +++ b/drivers/video/fbdev/core/fbcon_ccw.c @@ -201,7 +201,7 @@ static void ccw_clear_margins(struct vc_data *vc, struct fb_info *info, region.color = color; region.rop = ROP_COPY; - if (rw && !bottom_only) { + if ((int) rw > 0 && !bottom_only) { region.dx = 0; region.dy = info->var.yoffset; region.height = rw; @@ -209,7 +209,7 @@ static void ccw_clear_margins(struct vc_data *vc, struct fb_info *info, info->fbops->fb_fillrect(info, ®ion); } - if (bh) { + if ((int) bh > 0) { region.dx = info->var.xoffset + bs; region.dy = 0; region.height = info->var.yres_virtual; diff --git a/drivers/video/fbdev/core/fbcon_cw.c b/drivers/video/fbdev/core/fbcon_cw.c index ce08251bfd38..fd098ff17574 100644 --- a/drivers/video/fbdev/core/fbcon_cw.c +++ b/drivers/video/fbdev/core/fbcon_cw.c @@ -184,7 +184,7 @@ static void cw_clear_margins(struct vc_data *vc, struct fb_info *info, region.color = color; region.rop = ROP_COPY; - if (rw && !bottom_only) { + if ((int) rw > 0 && !bottom_only) { region.dx = 0; region.dy = info->var.yoffset + rs; region.height = rw; @@ -192,7 +192,7 @@ static void cw_clear_margins(struct vc_data *vc, struct fb_info *info, info->fbops->fb_fillrect(info, ®ion); } - if (bh) { + if ((int) bh > 0) { region.dx = info->var.xoffset; region.dy = info->var.yoffset; region.height = info->var.yres; diff --git a/drivers/video/fbdev/core/fbcon_ud.c b/drivers/video/fbdev/core/fbcon_ud.c index 1936afc78fec..e165a3fad29a 100644 --- a/drivers/video/fbdev/core/fbcon_ud.c +++ b/drivers/video/fbdev/core/fbcon_ud.c @@ -231,7 +231,7 @@ static void ud_clear_margins(struct vc_data *vc, struct fb_info *info, region.color = color; region.rop = ROP_COPY; - if (rw && !bottom_only) { + if ((int) rw > 0 && !bottom_only) { region.dy = 0; region.dx = info->var.xoffset; region.width = rw; @@ -239,7 +239,7 @@ static void ud_clear_margins(struct vc_data *vc, struct fb_info *info, info->fbops->fb_fillrect(info, ®ion); } - if (bh) { + if ((int) bh > 0) { region.dy = info->var.yoffset; region.dx = info->var.xoffset; region.height = bh; From 29c4a54bc645c8b6745eeb58519e9ad794ceb419 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 16 Jul 2020 21:27:47 +0300 Subject: [PATCH 32/34] device property: Avoid NULL pointer dereference in device_get_next_child_node() When we have no primary fwnode or when it's a software node, we may end up in the situation when fwnode is a NULL pointer. There is no point to look for secondary fwnode in such case. Add a necessary check to a condition. Fixes: 114dbb4fa7c4 ("drivers property: When no children in primary, try secondary") Reported-by: Maxim Levitsky Signed-off-by: Andy Shevchenko Tested-by: Maxim Levitsky Link: https://lore.kernel.org/r/20200716182747.54929-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/property.c b/drivers/base/property.c index 1e6d75e65938..d58aa98fe964 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -721,7 +721,7 @@ struct fwnode_handle *device_get_next_child_node(struct device *dev, return next; /* When no more children in primary, continue with secondary */ - if (!IS_ERR_OR_NULL(fwnode->secondary)) + if (fwnode && !IS_ERR_OR_NULL(fwnode->secondary)) next = fwnode_get_next_child_node(fwnode->secondary, child); return next; From 5fdbe136ae19ab751daaa4d08d9a42f3e30d17f9 Mon Sep 17 00:00:00 2001 From: Matthew Howell Date: Wed, 22 Jul 2020 16:11:24 -0400 Subject: [PATCH 33/34] serial: exar: Fix GPIO configuration for Sealevel cards based on XR17V35X Sealevel XR17V35X based devices are inoperable on kernel versions 4.11 and above due to a change in the GPIO preconfiguration introduced in commit 7dea8165f1d. This patch fixes this by preconfiguring the GPIO on Sealevel cards to the value (0x00) used prior to commit 7dea8165f1d With GPIOs preconfigured as per commit 7dea8165f1d all ports on Sealevel XR17V35X based devices become stuck in high impedance mode, regardless of dip-switch or software configuration. This causes the device to become effectively unusable. This patch (in various forms) has been distributed to our customers and no issues related to it have been reported. Fixes: 7dea8165f1d6 ("serial: exar: Preconfigure xr17v35x MPIOs as output") Signed-off-by: Matthew Howell Link: https://lore.kernel.org/r/alpine.DEB.2.21.2007221605270.13247@tstest-VirtualBox Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_exar.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c index ddb6aeb76dc5..04b9af7ed941 100644 --- a/drivers/tty/serial/8250/8250_exar.c +++ b/drivers/tty/serial/8250/8250_exar.c @@ -326,7 +326,17 @@ static void setup_gpio(struct pci_dev *pcidev, u8 __iomem *p) * devices will export them as GPIOs, so we pre-configure them safely * as inputs. */ - u8 dir = pcidev->vendor == PCI_VENDOR_ID_EXAR ? 0xff : 0x00; + + u8 dir = 0x00; + + if ((pcidev->vendor == PCI_VENDOR_ID_EXAR) && + (pcidev->subsystem_vendor != PCI_VENDOR_ID_SEALEVEL)) { + // Configure GPIO as inputs for Commtech adapters + dir = 0xff; + } else { + // Configure GPIO as outputs for SeaLevel adapters + dir = 0x00; + } writeb(0x00, p + UART_EXAR_MPIOINT_7_0); writeb(0x00, p + UART_EXAR_MPIOLVL_7_0); From 92ed301919932f777713b9172e525674157e983d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Jul 2020 14:14:06 -0700 Subject: [PATCH 34/34] Linux 5.8-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1bf740d91459..229e67f2ff75 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Kleptomaniac Octopus # *DOCUMENTATION*