From a1f32eeca60e74f6f103ce74f586570deddaef7a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 1 Jul 2019 13:26:28 -0700 Subject: [PATCH 001/277] f2fs: use generic checking and prep function for FS_IOC_SETFLAGS Make the f2fs implementation of FS_IOC_SETFLAGS use the new VFS helper function vfs_ioc_setflags_prepare(). This is based on a patch from Darrick Wong, but reworked to apply after commit 360985573b55 ("f2fs: separate f2fs i_flags from fs_flags and ext4 i_flags"). Originally-from: Darrick J. Wong Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Reviewed-by: Darrick J. Wong Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f8d46df8fa9e..b7aa0144d874 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1770,7 +1770,8 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - u32 fsflags; + struct f2fs_inode_info *fi = F2FS_I(inode); + u32 fsflags, old_fsflags; u32 iflags; int ret; @@ -1794,8 +1795,14 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) inode_lock(inode); + old_fsflags = f2fs_iflags_to_fsflags(fi->i_flags); + ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags); + if (ret) + goto out; + ret = f2fs_setflags_common(inode, iflags, f2fs_fsflags_to_iflags(F2FS_SETTABLE_FS_FL)); +out: inode_unlock(inode); mnt_drop_write_file(filp); return ret; From 6fc93c4e0ad12e34475755315749143013c702ab Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 1 Jul 2019 13:26:29 -0700 Subject: [PATCH 002/277] f2fs: use generic checking function for FS_IOC_FSSETXATTR Make the f2fs implementation of FS_IOC_FSSETXATTR use the new VFS helper function vfs_ioc_fssetxattr_check(), and remove the project quota check since it's now done by the helper function. This is based on a patch from Darrick Wong, but reworked to apply after commit 360985573b55 ("f2fs: separate f2fs i_flags from fs_flags and ext4 i_flags"). Originally-from: Darrick J. Wong Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Reviewed-by: Darrick J. Wong Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 49 ++++++++++++++++--------------------------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b7aa0144d874..717bad8d31b0 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2862,52 +2862,32 @@ static inline u32 f2fs_xflags_to_iflags(u32 xflags) return iflags; } +static void f2fs_fill_fsxattr(struct inode *inode, struct fsxattr *fa) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + + simple_fill_fsxattr(fa, f2fs_iflags_to_xflags(fi->i_flags)); + + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) + fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid); +} + static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - struct f2fs_inode_info *fi = F2FS_I(inode); struct fsxattr fa; - memset(&fa, 0, sizeof(struct fsxattr)); - fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags); - - if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) - fa.fsx_projid = (__u32)from_kprojid(&init_user_ns, - fi->i_projid); + f2fs_fill_fsxattr(inode, &fa); if (copy_to_user((struct fsxattr __user *)arg, &fa, sizeof(fa))) return -EFAULT; return 0; } -static int f2fs_ioctl_check_project(struct inode *inode, struct fsxattr *fa) -{ - /* - * Project Quota ID state is only allowed to change from within the init - * namespace. Enforce that restriction only if we are trying to change - * the quota ID state. Everything else is allowed in user namespaces. - */ - if (current_user_ns() == &init_user_ns) - return 0; - - if (__kprojid_val(F2FS_I(inode)->i_projid) != fa->fsx_projid) - return -EINVAL; - - if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) { - if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT)) - return -EINVAL; - } else { - if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT) - return -EINVAL; - } - - return 0; -} - static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - struct fsxattr fa; + struct fsxattr fa, old_fa; u32 iflags; int err; @@ -2930,9 +2910,12 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) return err; inode_lock(inode); - err = f2fs_ioctl_check_project(inode, &fa); + + f2fs_fill_fsxattr(inode, &old_fa); + err = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa); if (err) goto out; + err = f2fs_setflags_common(inode, iflags, f2fs_xflags_to_iflags(F2FS_SUPPORTED_XFLAGS)); if (err) From d5e5efa250cd644896f541551610fe7f0de35e0a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 1 Jul 2019 13:26:30 -0700 Subject: [PATCH 003/277] f2fs: remove redundant check from f2fs_setflags_common() Now that f2fs_ioc_setflags() and f2fs_ioc_fssetxattr() call the VFS helper functions which check for permission to change the immutable and append-only flags, it's no longer needed to do this check in f2fs_setflags_common() too. So remove it. This is based on a patch from Darrick Wong, but reworked to apply after commit 360985573b55 ("f2fs: separate f2fs i_flags from fs_flags and ext4 i_flags"). Originally-from: Darrick J. Wong Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Reviewed-by: Darrick J. Wong Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 717bad8d31b0..3e58a6f697dd 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1653,19 +1653,12 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id) static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) { struct f2fs_inode_info *fi = F2FS_I(inode); - u32 oldflags; /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) return -EPERM; - oldflags = fi->i_flags; - - if ((iflags ^ oldflags) & (F2FS_APPEND_FL | F2FS_IMMUTABLE_FL)) - if (!capable(CAP_LINUX_IMMUTABLE)) - return -EPERM; - - fi->i_flags = iflags | (oldflags & ~mask); + fi->i_flags = iflags | (fi->i_flags & ~mask); if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); From fc62113b32c95906b3ea8ba42e91014c7d0c6fa6 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 15 Jul 2019 18:00:14 +0800 Subject: [PATCH 004/277] mmc: host: sdhci-sprd: Fix the missing pm_runtime_put_noidle() When the SD host controller tries to probe again due to the derferred probe mechanism, it will always keep the SD host device as runtime resume state due to missing the runtime put operation in error path last time. Thus add the pm_runtime_put_noidle() in error path to make the PM runtime counter balance, which can make the SD host device's PM runtime work well. Signed-off-by: Baolin Wang Acked-by: Adrian Hunter Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host controller") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-sprd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index 6ee340a3fb3a..603a5d9f045a 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -624,6 +624,7 @@ err_cleanup_host: sdhci_cleanup_host(host); pm_runtime_disable: + pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); pm_runtime_set_suspended(&pdev->dev); From ba2d139b02ba684c6c101de42fed782d6cd2b997 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 8 Jul 2019 12:56:13 -0700 Subject: [PATCH 005/277] mmc: dw_mmc: Fix occasional hang after tuning on eMMC In commit 46d179525a1f ("mmc: dw_mmc: Wait for data transfer after response errors.") we fixed a tuning-induced hang that I saw when stress testing tuning on certain SD cards. I won't re-hash that whole commit, but the summary is that as a normal part of tuning you need to deal with transfer errors and there were cases where these transfer errors was putting my system into a bad state causing all future transfers to fail. That commit fixed handling of the transfer errors for me. In downstream Chrome OS my fix landed and had the same behavior for all SD/MMC commands. However, it looks like when the commit landed upstream we limited it to only SD tuning commands. Presumably this was to try to get around problems that Alim Akhtar reported on exynos [1]. Unfortunately while stress testing reboots (and suspend/resume) on some rk3288-based Chromebooks I found the same problem on the eMMC on some of my Chromebooks (the ones with Hynix eMMC). Since the eMMC tuning command is different (MMC_SEND_TUNING_BLOCK_HS200 vs. MMC_SEND_TUNING_BLOCK) we were basically getting back into the same situation. I'm hoping that whatever problems exynos was having in the past are somehow magically fixed now and we can make the behavior the same for all commands. [1] https://lkml.kernel.org/r/CAGOxZ53WfNbaMe0_AM0qBqU47kAfgmPBVZC8K8Y-_J3mDMqW4A@mail.gmail.com Fixes: 46d179525a1f ("mmc: dw_mmc: Wait for data transfer after response errors.") Signed-off-by: Douglas Anderson Cc: Marek Szyprowski Cc: Alim Akhtar Cc: Enric Balletbo i Serra Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index faaaf52a46d2..eea52e2c5a0c 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -2012,8 +2012,7 @@ static void dw_mci_tasklet_func(unsigned long priv) * delayed. Allowing the transfer to take place * avoids races and keeps things simple. */ - if ((err != -ETIMEDOUT) && - (cmd->opcode == MMC_SEND_TUNING_BLOCK)) { + if (err != -ETIMEDOUT) { state = STATE_SENDING_DATA; continue; } From 665e985c2f41bebc3e6cee7e04c36a44afbc58f7 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 9 Jul 2019 22:04:19 -0700 Subject: [PATCH 006/277] mmc: meson-mx-sdio: Fix misuse of GENMASK macro Arguments are supposed to be ordered high then low. Signed-off-by: Joe Perches Reviewed-by: Neil Armstrong Fixes: ed80a13bb4c4 ("mmc: meson-mx-sdio: Add a driver for the Amlogic Meson8 and Meson8b SoCs") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-mx-sdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/meson-mx-sdio.c b/drivers/mmc/host/meson-mx-sdio.c index 2d736e416775..ba9a63db73da 100644 --- a/drivers/mmc/host/meson-mx-sdio.c +++ b/drivers/mmc/host/meson-mx-sdio.c @@ -73,7 +73,7 @@ #define MESON_MX_SDIO_IRQC_IF_CONFIG_MASK GENMASK(7, 6) #define MESON_MX_SDIO_IRQC_FORCE_DATA_CLK BIT(8) #define MESON_MX_SDIO_IRQC_FORCE_DATA_CMD BIT(9) - #define MESON_MX_SDIO_IRQC_FORCE_DATA_DAT_MASK GENMASK(10, 13) + #define MESON_MX_SDIO_IRQC_FORCE_DATA_DAT_MASK GENMASK(13, 10) #define MESON_MX_SDIO_IRQC_SOFT_RESET BIT(15) #define MESON_MX_SDIO_IRQC_FORCE_HALT BIT(30) #define MESON_MX_SDIO_IRQC_HALT_HOLE BIT(31) From 3a6ffb3c8c3274a39dc8f2514526e645c5d21753 Mon Sep 17 00:00:00 2001 From: Andreas Koop Date: Mon, 22 Jul 2019 12:03:06 +0800 Subject: [PATCH 007/277] mmc: mmc_spi: Enable stable writes While using the mmc_spi driver occasionally errors like this popped up: mmcblk0: error -84 transferring data end_request: I/O error, dev mmcblk0, sector 581756 I looked on the Internet for occurrences of the same problem and came across a helpful post [1]. It includes source code to reproduce the bug. There is also an analysis about the cause. During transmission data in the supplied buffer is being modified. Thus the previously calculated checksum is not correct anymore. After some digging I found out that device drivers are supposed to report they need stable writes. To fix this I set the appropriate flag at queue initialization if CRC checksumming is enabled for that SPI host. [1] https://groups.google.com/forum/#!msg/sim1/gLlzWeXGFr8/KevXinUXfc8J Signed-off-by: Andreas Koop [shihpo: Rebase on top of v5.3-rc1] Signed-off-by: ShihPo Hung Cc: Paul Walmsley CC: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/core/queue.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index e327f80ebe70..7102e2ebc614 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -427,6 +428,10 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) goto free_tag_set; } + if (mmc_host_is_spi(host) && host->use_spi_crc) + mq->queue->backing_dev_info->capabilities |= + BDI_CAP_STABLE_WRITES; + mq->queue->queuedata = mq; blk_queue_rq_timeout(mq->queue, 60 * HZ); From 223ecaf140b1dd1c1d2a1a1d96281efc5c906984 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Mon, 8 Jul 2019 13:23:08 +0800 Subject: [PATCH 008/277] gpiolib: fix incorrect IRQ requesting of an active-low lineevent When a pin is active-low, logical trigger edge should be inverted to match the same interrupt opportunity. For example, a button pushed triggers falling edge in ACTIVE_HIGH case; in ACTIVE_LOW case, the button pushed triggers rising edge. For user space the IRQ requesting doesn't need to do any modification except to configuring GPIOHANDLE_REQUEST_ACTIVE_LOW. For example, we want to catch the event when the button is pushed. The button on the original board drives level to be low when it is pushed, and drives level to be high when it is released. In user space we can do: req.handleflags = GPIOHANDLE_REQUEST_INPUT; req.eventflags = GPIOEVENT_REQUEST_FALLING_EDGE; while (1) { read(fd, &dat, sizeof(dat)); if (dat.id == GPIOEVENT_EVENT_FALLING_EDGE) printf("button pushed\n"); } Run the same logic on another board which the polarity of the button is inverted; it drives level to be high when pushed, and level to be low when released. For this inversion we add flag GPIOHANDLE_REQUEST_ACTIVE_LOW: req.handleflags = GPIOHANDLE_REQUEST_INPUT | GPIOHANDLE_REQUEST_ACTIVE_LOW; req.eventflags = GPIOEVENT_REQUEST_FALLING_EDGE; At the result, there are no any events caught when the button is pushed. By the way, button releasing will emit a "falling" event. The timing of "falling" catching is not expected. Cc: stable@vger.kernel.org Signed-off-by: Michael Wu Tested-by: Bartosz Golaszewski Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 3ee99d070608..bf05c29b53be 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -956,9 +956,11 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) } if (eflags & GPIOEVENT_REQUEST_RISING_EDGE) - irqflags |= IRQF_TRIGGER_RISING; + irqflags |= test_bit(FLAG_ACTIVE_LOW, &desc->flags) ? + IRQF_TRIGGER_FALLING : IRQF_TRIGGER_RISING; if (eflags & GPIOEVENT_REQUEST_FALLING_EDGE) - irqflags |= IRQF_TRIGGER_FALLING; + irqflags |= test_bit(FLAG_ACTIVE_LOW, &desc->flags) ? + IRQF_TRIGGER_RISING : IRQF_TRIGGER_FALLING; irqflags |= IRQF_ONESHOT; INIT_KFIFO(le->events); From dd422906799f240bfd400a5d376aa43f7b89c38a Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 18 Jul 2019 17:27:20 +0800 Subject: [PATCH 009/277] mm/balloon_compaction: avoid duplicate page removal A #GP is reported in the guest when requesting balloon inflation via virtio-balloon. The reason is that the virtio-balloon driver has removed the page from its internal page list (via balloon_page_pop), but balloon_page_enqueue_one also calls "list_del" to do the removal. This is necessary when it's used from balloon_page_enqueue_list, but not from balloon_page_enqueue. Move list_del to balloon_page_enqueue, and update comments accordingly. Fixes: 418a3ab1e778 (mm/balloon_compaction: List interfaces) Signed-off-by: Wei Wang Signed-off-by: Michael S. Tsirkin --- mm/balloon_compaction.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 83a7b614061f..d25664e1857b 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -21,7 +21,6 @@ static void balloon_page_enqueue_one(struct balloon_dev_info *b_dev_info, * memory corruption is possible and we should stop execution. */ BUG_ON(!trylock_page(page)); - list_del(&page->lru); balloon_page_insert(b_dev_info, page); unlock_page(page); __count_vm_event(BALLOON_INFLATE); @@ -47,6 +46,7 @@ size_t balloon_page_list_enqueue(struct balloon_dev_info *b_dev_info, spin_lock_irqsave(&b_dev_info->pages_lock, flags); list_for_each_entry_safe(page, tmp, pages, lru) { + list_del(&page->lru); balloon_page_enqueue_one(b_dev_info, page); n_pages++; } @@ -128,13 +128,19 @@ struct page *balloon_page_alloc(void) EXPORT_SYMBOL_GPL(balloon_page_alloc); /* - * balloon_page_enqueue - allocates a new page and inserts it into the balloon - * page list. + * balloon_page_enqueue - inserts a new page into the balloon page list. + * * @b_dev_info: balloon device descriptor where we will insert a new page to * @page: new page to enqueue - allocated using balloon_page_alloc. * * Driver must call it to properly enqueue a new allocated balloon page * before definitively removing it from the guest system. + * + * Drivers must not call balloon_page_enqueue on pages that have been + * pushed to a list with balloon_page_push before removing them with + * balloon_page_pop. To all pages on a list, use balloon_page_list_enqueue + * instead. + * * This function returns the page address for the recently enqueued page or * NULL in the case we fail to allocate a new page this turn. */ From cfe61801b0f11eb561f1adf452d995efaafbc68b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 18 Jul 2019 08:19:24 -0400 Subject: [PATCH 010/277] balloon: fix up comments Lots of comments bitrotted. Fix them up. Fixes: 418a3ab1e778 (mm/balloon_compaction: List interfaces) Reviewed-by: Wei Wang Signed-off-by: Michael S. Tsirkin Reviewed-by: Ralph Campbell Acked-by: Nadav Amit --- mm/balloon_compaction.c | 67 +++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index d25664e1857b..798275a51887 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -32,8 +32,8 @@ static void balloon_page_enqueue_one(struct balloon_dev_info *b_dev_info, * @b_dev_info: balloon device descriptor where we will insert a new page to * @pages: pages to enqueue - allocated using balloon_page_alloc. * - * Driver must call it to properly enqueue a balloon pages before definitively - * removing it from the guest system. + * Driver must call this function to properly enqueue balloon pages before + * definitively removing them from the guest system. * * Return: number of pages that were enqueued. */ @@ -63,12 +63,13 @@ EXPORT_SYMBOL_GPL(balloon_page_list_enqueue); * @n_req_pages: number of requested pages. * * Driver must call this function to properly de-allocate a previous enlisted - * balloon pages before definetively releasing it back to the guest system. + * balloon pages before definitively releasing it back to the guest system. * This function tries to remove @n_req_pages from the ballooned pages and * return them to the caller in the @pages list. * - * Note that this function may fail to dequeue some pages temporarily empty due - * to compaction isolated pages. + * Note that this function may fail to dequeue some pages even if the balloon + * isn't empty - since the page list can be temporarily empty due to compaction + * of isolated pages. * * Return: number of pages that were added to the @pages list. */ @@ -112,12 +113,13 @@ EXPORT_SYMBOL_GPL(balloon_page_list_dequeue); /* * balloon_page_alloc - allocates a new page for insertion into the balloon - * page list. + * page list. * - * Driver must call it to properly allocate a new enlisted balloon page. - * Driver must call balloon_page_enqueue before definitively removing it from - * the guest system. This function returns the page address for the recently - * allocated page or NULL in the case we fail to allocate a new page this turn. + * Driver must call this function to properly allocate a new balloon page. + * Driver must call balloon_page_enqueue before definitively removing the page + * from the guest system. + * + * Return: struct page for the allocated page or NULL on allocation failure. */ struct page *balloon_page_alloc(void) { @@ -130,19 +132,15 @@ EXPORT_SYMBOL_GPL(balloon_page_alloc); /* * balloon_page_enqueue - inserts a new page into the balloon page list. * - * @b_dev_info: balloon device descriptor where we will insert a new page to + * @b_dev_info: balloon device descriptor where we will insert a new page * @page: new page to enqueue - allocated using balloon_page_alloc. * - * Driver must call it to properly enqueue a new allocated balloon page - * before definitively removing it from the guest system. + * Drivers must call this function to properly enqueue a new allocated balloon + * page before definitively removing the page from the guest system. * - * Drivers must not call balloon_page_enqueue on pages that have been - * pushed to a list with balloon_page_push before removing them with - * balloon_page_pop. To all pages on a list, use balloon_page_list_enqueue - * instead. - * - * This function returns the page address for the recently enqueued page or - * NULL in the case we fail to allocate a new page this turn. + * Drivers must not call balloon_page_enqueue on pages that have been pushed to + * a list with balloon_page_push before removing them with balloon_page_pop. To + * enqueue a list of pages, use balloon_page_list_enqueue instead. */ void balloon_page_enqueue(struct balloon_dev_info *b_dev_info, struct page *page) @@ -157,14 +155,23 @@ EXPORT_SYMBOL_GPL(balloon_page_enqueue); /* * balloon_page_dequeue - removes a page from balloon's page list and returns - * the its address to allow the driver release the page. + * its address to allow the driver to release the page. * @b_dev_info: balloon device decriptor where we will grab a page from. * - * Driver must call it to properly de-allocate a previous enlisted balloon page - * before definetively releasing it back to the guest system. - * This function returns the page address for the recently dequeued page or - * NULL in the case we find balloon's page list temporarily empty due to - * compaction isolated pages. + * Driver must call this function to properly dequeue a previously enqueued page + * before definitively releasing it back to the guest system. + * + * Caller must perform its own accounting to ensure that this + * function is called only if some pages are actually enqueued. + * + * Note that this function may fail to dequeue some pages even if there are + * some enqueued pages - since the page list can be temporarily empty due to + * the compaction of isolated pages. + * + * TODO: remove the caller accounting requirements, and allow caller to wait + * until all pages can be dequeued. + * + * Return: struct page for the dequeued page, or NULL if no page was dequeued. */ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info) { @@ -177,9 +184,9 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info) if (n_pages != 1) { /* * If we are unable to dequeue a balloon page because the page - * list is empty and there is no isolated pages, then something + * list is empty and there are no isolated pages, then something * went out of track and some balloon pages are lost. - * BUG() here, otherwise the balloon driver may get stuck into + * BUG() here, otherwise the balloon driver may get stuck in * an infinite loop while attempting to release all its pages. */ spin_lock_irqsave(&b_dev_info->pages_lock, flags); @@ -230,8 +237,8 @@ int balloon_page_migrate(struct address_space *mapping, /* * We can not easily support the no copy case here so ignore it as it - * is unlikely to be use with ballon pages. See include/linux/hmm.h for - * user of the MIGRATE_SYNC_NO_COPY mode. + * is unlikely to be used with balloon pages. See include/linux/hmm.h + * for a user of the MIGRATE_SYNC_NO_COPY mode. */ if (mode == MIGRATE_SYNC_NO_COPY) return -EINVAL; From ae24fb49d01103c80d6ff3b78714259c1c62c958 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 22 Jul 2019 15:40:07 +0100 Subject: [PATCH 011/277] iommu/virtio: Update to most recent specification Following specification review a few things were changed in v8 of the virtio-iommu series [1], but have been omitted when merging the base driver. Add them now: * Remove the EXEC flag. * Add feature bit for the MMIO flag. * Change domain_bits to domain_range. * Add NOMEM status flag. [1] https://lore.kernel.org/linux-iommu/20190530170929.19366-1-jean-philippe.brucker@arm.com/ Fixes: edcd69ab9a32 ("iommu: Add virtio-iommu driver") Reported-by: Eric Auger Signed-off-by: Jean-Philippe Brucker Signed-off-by: Michael S. Tsirkin Reviewed-by: Eric Auger Tested-by: Eric Auger Acked-by: Joerg Roedel --- drivers/iommu/virtio-iommu.c | 40 ++++++++++++++++++++++--------- include/uapi/linux/virtio_iommu.h | 32 ++++++++++++++----------- 2 files changed, 47 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 433f4d2ee956..80a740df0737 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -2,7 +2,7 @@ /* * Virtio driver for the paravirtualized IOMMU * - * Copyright (C) 2018 Arm Limited + * Copyright (C) 2019 Arm Limited */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -47,7 +47,10 @@ struct viommu_dev { /* Device configuration */ struct iommu_domain_geometry geometry; u64 pgsize_bitmap; - u8 domain_bits; + u32 first_domain; + u32 last_domain; + /* Supported MAP flags */ + u32 map_flags; u32 probe_size; }; @@ -62,6 +65,7 @@ struct viommu_domain { struct viommu_dev *viommu; struct mutex mutex; /* protects viommu pointer */ unsigned int id; + u32 map_flags; spinlock_t mappings_lock; struct rb_root_cached mappings; @@ -113,6 +117,8 @@ static int viommu_get_req_errno(void *buf, size_t len) return -ENOENT; case VIRTIO_IOMMU_S_FAULT: return -EFAULT; + case VIRTIO_IOMMU_S_NOMEM: + return -ENOMEM; case VIRTIO_IOMMU_S_IOERR: case VIRTIO_IOMMU_S_DEVERR: default: @@ -607,15 +613,15 @@ static int viommu_domain_finalise(struct viommu_dev *viommu, { int ret; struct viommu_domain *vdomain = to_viommu_domain(domain); - unsigned int max_domain = viommu->domain_bits > 31 ? ~0 : - (1U << viommu->domain_bits) - 1; vdomain->viommu = viommu; + vdomain->map_flags = viommu->map_flags; domain->pgsize_bitmap = viommu->pgsize_bitmap; domain->geometry = viommu->geometry; - ret = ida_alloc_max(&viommu->domain_ids, max_domain, GFP_KERNEL); + ret = ida_alloc_range(&viommu->domain_ids, viommu->first_domain, + viommu->last_domain, GFP_KERNEL); if (ret >= 0) vdomain->id = (unsigned int)ret; @@ -710,7 +716,7 @@ static int viommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { int ret; - int flags; + u32 flags; struct virtio_iommu_req_map map; struct viommu_domain *vdomain = to_viommu_domain(domain); @@ -718,6 +724,9 @@ static int viommu_map(struct iommu_domain *domain, unsigned long iova, (prot & IOMMU_WRITE ? VIRTIO_IOMMU_MAP_F_WRITE : 0) | (prot & IOMMU_MMIO ? VIRTIO_IOMMU_MAP_F_MMIO : 0); + if (flags & ~vdomain->map_flags) + return -EINVAL; + ret = viommu_add_mapping(vdomain, iova, paddr, size, flags); if (ret) return ret; @@ -1027,7 +1036,8 @@ static int viommu_probe(struct virtio_device *vdev) goto err_free_vqs; } - viommu->domain_bits = 32; + viommu->map_flags = VIRTIO_IOMMU_MAP_F_READ | VIRTIO_IOMMU_MAP_F_WRITE; + viommu->last_domain = ~0U; /* Optional features */ virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE, @@ -1038,9 +1048,13 @@ static int viommu_probe(struct virtio_device *vdev) struct virtio_iommu_config, input_range.end, &input_end); - virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_BITS, - struct virtio_iommu_config, domain_bits, - &viommu->domain_bits); + virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE, + struct virtio_iommu_config, domain_range.start, + &viommu->first_domain); + + virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE, + struct virtio_iommu_config, domain_range.end, + &viommu->last_domain); virtio_cread_feature(vdev, VIRTIO_IOMMU_F_PROBE, struct virtio_iommu_config, probe_size, @@ -1052,6 +1066,9 @@ static int viommu_probe(struct virtio_device *vdev) .force_aperture = true, }; + if (virtio_has_feature(vdev, VIRTIO_IOMMU_F_MMIO)) + viommu->map_flags |= VIRTIO_IOMMU_MAP_F_MMIO; + viommu_ops.pgsize_bitmap = viommu->pgsize_bitmap; virtio_device_ready(vdev); @@ -1130,9 +1147,10 @@ static void viommu_config_changed(struct virtio_device *vdev) static unsigned int features[] = { VIRTIO_IOMMU_F_MAP_UNMAP, - VIRTIO_IOMMU_F_DOMAIN_BITS, VIRTIO_IOMMU_F_INPUT_RANGE, + VIRTIO_IOMMU_F_DOMAIN_RANGE, VIRTIO_IOMMU_F_PROBE, + VIRTIO_IOMMU_F_MMIO, }; static struct virtio_device_id id_table[] = { diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h index ba1b460c9944..237e36a280cb 100644 --- a/include/uapi/linux/virtio_iommu.h +++ b/include/uapi/linux/virtio_iommu.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* - * Virtio-iommu definition v0.9 + * Virtio-iommu definition v0.12 * - * Copyright (C) 2018 Arm Ltd. + * Copyright (C) 2019 Arm Ltd. */ #ifndef _UAPI_LINUX_VIRTIO_IOMMU_H #define _UAPI_LINUX_VIRTIO_IOMMU_H @@ -11,26 +11,31 @@ /* Feature bits */ #define VIRTIO_IOMMU_F_INPUT_RANGE 0 -#define VIRTIO_IOMMU_F_DOMAIN_BITS 1 +#define VIRTIO_IOMMU_F_DOMAIN_RANGE 1 #define VIRTIO_IOMMU_F_MAP_UNMAP 2 #define VIRTIO_IOMMU_F_BYPASS 3 #define VIRTIO_IOMMU_F_PROBE 4 +#define VIRTIO_IOMMU_F_MMIO 5 -struct virtio_iommu_range { - __u64 start; - __u64 end; +struct virtio_iommu_range_64 { + __le64 start; + __le64 end; +}; + +struct virtio_iommu_range_32 { + __le32 start; + __le32 end; }; struct virtio_iommu_config { /* Supported page sizes */ - __u64 page_size_mask; + __le64 page_size_mask; /* Supported IOVA range */ - struct virtio_iommu_range input_range; + struct virtio_iommu_range_64 input_range; /* Max domain ID size */ - __u8 domain_bits; - __u8 padding[3]; + struct virtio_iommu_range_32 domain_range; /* Probe buffer size */ - __u32 probe_size; + __le32 probe_size; }; /* Request types */ @@ -49,6 +54,7 @@ struct virtio_iommu_config { #define VIRTIO_IOMMU_S_RANGE 0x05 #define VIRTIO_IOMMU_S_NOENT 0x06 #define VIRTIO_IOMMU_S_FAULT 0x07 +#define VIRTIO_IOMMU_S_NOMEM 0x08 struct virtio_iommu_req_head { __u8 type; @@ -78,12 +84,10 @@ struct virtio_iommu_req_detach { #define VIRTIO_IOMMU_MAP_F_READ (1 << 0) #define VIRTIO_IOMMU_MAP_F_WRITE (1 << 1) -#define VIRTIO_IOMMU_MAP_F_EXEC (1 << 2) -#define VIRTIO_IOMMU_MAP_F_MMIO (1 << 3) +#define VIRTIO_IOMMU_MAP_F_MMIO (1 << 2) #define VIRTIO_IOMMU_MAP_F_MASK (VIRTIO_IOMMU_MAP_F_READ | \ VIRTIO_IOMMU_MAP_F_WRITE | \ - VIRTIO_IOMMU_MAP_F_EXEC | \ VIRTIO_IOMMU_MAP_F_MMIO) struct virtio_iommu_req_map { From d5121ffebc38a16b2419b664e466a2f3e5c7b457 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 18 Jul 2019 09:27:10 +0000 Subject: [PATCH 012/277] RDMA/siw: Fix error return code in siw_init_module() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface") Link: https://lore.kernel.org/r/20190718092710.85709-1-weiyongjun1@huawei.com Signed-off-by: Wei Yongjun Reviewed-by: Bernard Metzler Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index f55c4e80aea4..d0f140daf659 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -612,6 +612,7 @@ static __init int siw_init_module(void) if (!siw_create_tx_threads()) { pr_info("siw: Could not start any TX thread\n"); + rv = -ENOMEM; goto out_error; } /* From 60c3becfd1a138fdcfe48f2a5ef41ef0078d481e Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Thu, 11 Jul 2019 09:32:17 +0800 Subject: [PATCH 013/277] RDMA/hns: Fix sg offset non-zero issue When run perftest in many times, the system will report a BUG as follows: BUG: Bad rss-counter state mm:(____ptrval____) idx:0 val:-1 BUG: Bad rss-counter state mm:(____ptrval____) idx:1 val:1 We tested with different kernel version and found it started from the the following commit: commit d10bcf947a3e ("RDMA/umem: Combine contiguous PAGE_SIZE regions in SGEs") In this commit, the sg->offset is always 0 when sg_set_page() is called in ib_umem_get() and the drivers are not allowed to change the sgl, otherwise it will get bad page descriptor when unfolding SGEs in __ib_umem_release() as sg_page_count() will get wrong result while sgl->offset is not 0. However, there is a weird sgl usage in the current hns driver, the driver modified sg->offset after calling ib_umem_get(), which caused we iterate past the wrong number of pages in for_each_sg_page iterator. This patch fixes it by correcting the non-standard sgl usage found in the hns_roce_db_map_user() function. Fixes: d10bcf947a3e ("RDMA/umem: Combine contiguous PAGE_SIZE regions in SGEs") Fixes: 0425e3e6e0c7 ("RDMA/hns: Support flush cqe for hip08 in kernel space") Link: https://lore.kernel.org/r/1562808737-45723-1-git-send-email-oulijun@huawei.com Signed-off-by: Xi Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_db.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index 627aa46ef683..c00714c2f16a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -12,13 +12,15 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, struct ib_udata *udata, unsigned long virt, struct hns_roce_db *db) { + unsigned long page_addr = virt & PAGE_MASK; struct hns_roce_user_db_page *page; + unsigned int offset; int ret = 0; mutex_lock(&context->page_mutex); list_for_each_entry(page, &context->page_list, list) - if (page->user_virt == (virt & PAGE_MASK)) + if (page->user_virt == page_addr) goto found; page = kmalloc(sizeof(*page), GFP_KERNEL); @@ -28,8 +30,8 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, } refcount_set(&page->refcount, 1); - page->user_virt = (virt & PAGE_MASK); - page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0); + page->user_virt = page_addr; + page->umem = ib_umem_get(udata, page_addr, PAGE_SIZE, 0, 0); if (IS_ERR(page->umem)) { ret = PTR_ERR(page->umem); kfree(page); @@ -39,10 +41,9 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, list_add(&page->list, &context->page_list); found: - db->dma = sg_dma_address(page->umem->sg_head.sgl) + - (virt & ~PAGE_MASK); - page->umem->sg_head.sgl->offset = virt & ~PAGE_MASK; - db->virt_addr = sg_virt(page->umem->sg_head.sgl); + offset = virt - page_addr; + db->dma = sg_dma_address(page->umem->sg_head.sgl) + offset; + db->virt_addr = sg_virt(page->umem->sg_head.sgl) + offset; db->u.user_page = page; refcount_inc(&page->refcount); From cd48a82087231fdba0e77521102386c6ed0168d6 Mon Sep 17 00:00:00 2001 From: John Fleck Date: Mon, 15 Jul 2019 12:45:21 -0400 Subject: [PATCH 014/277] IB/hfi1: Check for error on call to alloc_rsm_map_table The call to alloc_rsm_map_table does not check if the kmalloc fails. Check for a NULL on alloc, and bail if it fails. Fixes: 372cc85a13c9 ("IB/hfi1: Extract RSM map table init from QOS") Link: https://lore.kernel.org/r/20190715164521.74174.27047.stgit@awfm-01.aw.intel.com Cc: Reviewed-by: Mike Marciniszyn Signed-off-by: John Fleck Signed-off-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index d5b643a1d9fd..67052dc3100c 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14452,7 +14452,7 @@ void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd) clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK); } -static void init_rxe(struct hfi1_devdata *dd) +static int init_rxe(struct hfi1_devdata *dd) { struct rsm_map_table *rmt; u64 val; @@ -14461,6 +14461,9 @@ static void init_rxe(struct hfi1_devdata *dd) write_csr(dd, RCV_ERR_MASK, ~0ull); rmt = alloc_rsm_map_table(dd); + if (!rmt) + return -ENOMEM; + /* set up QOS, including the QPN map table */ init_qos(dd, rmt); init_fecn_handling(dd, rmt); @@ -14487,6 +14490,7 @@ static void init_rxe(struct hfi1_devdata *dd) val |= ((4ull & RCV_BYPASS_HDR_SIZE_MASK) << RCV_BYPASS_HDR_SIZE_SHIFT); write_csr(dd, RCV_BYPASS, val); + return 0; } static void init_other(struct hfi1_devdata *dd) @@ -15024,7 +15028,10 @@ int hfi1_init_dd(struct hfi1_devdata *dd) goto bail_cleanup; /* set initial RXE CSRs */ - init_rxe(dd); + ret = init_rxe(dd); + if (ret) + goto bail_cleanup; + /* set initial TXE CSRs */ init_txe(dd); /* set initial non-RXE, non-TXE CSRs */ From 2b74c878b0eae4c32629c2d5ba69a29f69048313 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 15 Jul 2019 12:45:28 -0400 Subject: [PATCH 015/277] IB/hfi1: Unreserve a flushed OPFN request When an OPFN request is flushed, the request is completed without unreserving itself from the send queue. Subsequently, when a new request is post sent, the following warning will be triggered: WARNING: CPU: 4 PID: 8130 at rdmavt/qp.c:1761 rvt_post_send+0x72a/0x880 [rdmavt] Call Trace: [] dump_stack+0x19/0x1b [] __warn+0xd8/0x100 [] warn_slowpath_null+0x1d/0x20 [] rvt_post_send+0x72a/0x880 [rdmavt] [] ? account_entity_dequeue+0xae/0xd0 [] ? __kmalloc+0x55/0x230 [] ib_uverbs_post_send+0x37c/0x5d0 [ib_uverbs] [] ? rdma_lookup_put_uobject+0x26/0x60 [ib_uverbs] [] ib_uverbs_write+0x286/0x460 [ib_uverbs] [] ? security_file_permission+0x27/0xa0 [] vfs_write+0xc0/0x1f0 [] SyS_write+0x7f/0xf0 [] system_call_fastpath+0x22/0x27 This patch fixes the problem by moving rvt_qp_wqe_unreserve() into rvt_qp_complete_swqe() to simplify the code and make it less error-prone. Fixes: ca95f802ef51 ("IB/hfi1: Unreserve a reserved request when it is completed") Link: https://lore.kernel.org/r/20190715164528.74174.31364.stgit@awfm-01.aw.intel.com Cc: Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/rc.c | 2 -- include/rdma/rdmavt_qp.h | 9 ++++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 0477c14633ab..024a7c2b6124 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1835,7 +1835,6 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; trdma_clean_swqe(qp, wqe); - rvt_qp_wqe_unreserve(qp, wqe); trace_hfi1_qp_send_completion(qp, wqe, qp->s_last); rvt_qp_complete_swqe(qp, wqe, @@ -1882,7 +1881,6 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 || cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { trdma_clean_swqe(qp, wqe); - rvt_qp_wqe_unreserve(qp, wqe); trace_hfi1_qp_send_completion(qp, wqe, qp->s_last); rvt_qp_complete_swqe(qp, wqe, diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 0eeea520a853..e06c77d76463 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -608,7 +608,7 @@ static inline void rvt_qp_wqe_reserve( /** * rvt_qp_wqe_unreserve - clean reserved operation * @qp - the rvt qp - * @wqe - the send wqe + * @flags - send wqe flags * * This decrements the reserve use count. * @@ -620,11 +620,9 @@ static inline void rvt_qp_wqe_reserve( * the compiler does not juggle the order of the s_last * ring index and the decrementing of s_reserved_used. */ -static inline void rvt_qp_wqe_unreserve( - struct rvt_qp *qp, - struct rvt_swqe *wqe) +static inline void rvt_qp_wqe_unreserve(struct rvt_qp *qp, int flags) { - if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) { + if (unlikely(flags & RVT_SEND_RESERVE_USED)) { atomic_dec(&qp->s_reserved_used); /* insure no compiler re-order up to s_last change */ smp_mb__after_atomic(); @@ -853,6 +851,7 @@ rvt_qp_complete_swqe(struct rvt_qp *qp, u32 byte_len, last; int flags = wqe->wr.send_flags; + rvt_qp_wqe_unreserve(qp, flags); rvt_put_qp_swqe(qp, wqe); need_completion = From dc25b239ebeaa3c58e5ceaa732140427d386aa16 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 15 Jul 2019 12:45:34 -0400 Subject: [PATCH 016/277] IB/hfi1: Field not zero-ed when allocating TID flow memory The field flow->resync_npkts is added for TID RDMA WRITE request and zero-ed when a TID RDMA WRITE RESP packet is received by the requester. This field is used to rewind a request during retry in the function hfi1_tid_rdma_restart_req() shared by both TID RDMA WRITE and TID RDMA READ requests. Therefore, when a TID RDMA READ request is retried, this field may not be initialized at all, which causes the retry to start at an incorrect psn, leading to the drop of the retry request by the responder. This patch fixes the problem by zeroing out the field when the flow memory is allocated. Fixes: 838b6fd2d9ca ("IB/hfi1: TID RDMA RcvArray programming and TID allocation") Cc: Link: https://lore.kernel.org/r/20190715164534.74174.6177.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/tid_rdma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 92acccaaaa86..7fcbeee84293 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -1620,6 +1620,7 @@ static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req, flows[i].req = req; flows[i].npagesets = 0; flows[i].pagesets[0].mapped = 0; + flows[i].resync_npkts = 0; } req->flows = flows; return 0; From f4d46119f214f9a7620b0d18b153d7e0e8c90b4f Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 15 Jul 2019 12:45:40 -0400 Subject: [PATCH 017/277] IB/hfi1: Drop all TID RDMA READ RESP packets after r_next_psn When a TID sequence error occurs while receiving TID RDMA READ RESP packets, all packets after flow->flow_state.r_next_psn should be dropped, including those response packets for subsequent segments. The current implementation will drop the subsequent response packets for the segment to complete next, but may accept packets for subsequent segments and therefore mistakenly advance the r_next_psn fields for the corresponding software flows. This may result in failures to complete subsequent segments after the current segment is completed. The fix is to only use the flow pointed by req->clear_tail for checking KDETH PSN instead of finding a flow from the request's flow array. Fixes: b885d5be9ca1 ("IB/hfi1: Unify the software PSN check for TID RDMA READ/WRITE") Cc: Link: https://lore.kernel.org/r/20190715164540.74174.54702.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/tid_rdma.c | 42 +-------------------------- 1 file changed, 1 insertion(+), 41 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 7fcbeee84293..996fc298207e 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -1674,34 +1674,6 @@ static struct tid_rdma_flow *find_flow_ib(struct tid_rdma_request *req, return NULL; } -static struct tid_rdma_flow * -__find_flow_ranged(struct tid_rdma_request *req, u16 head, u16 tail, - u32 psn, u16 *fidx) -{ - for ( ; CIRC_CNT(head, tail, MAX_FLOWS); - tail = CIRC_NEXT(tail, MAX_FLOWS)) { - struct tid_rdma_flow *flow = &req->flows[tail]; - u32 spsn, lpsn; - - spsn = full_flow_psn(flow, flow->flow_state.spsn); - lpsn = full_flow_psn(flow, flow->flow_state.lpsn); - - if (cmp_psn(psn, spsn) >= 0 && cmp_psn(psn, lpsn) <= 0) { - if (fidx) - *fidx = tail; - return flow; - } - } - return NULL; -} - -static struct tid_rdma_flow *find_flow(struct tid_rdma_request *req, - u32 psn, u16 *fidx) -{ - return __find_flow_ranged(req, req->setup_head, req->clear_tail, psn, - fidx); -} - /* TID RDMA READ functions */ u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe, struct ib_other_headers *ohdr, u32 *bth1, @@ -2789,19 +2761,7 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, * to prevent continuous Flow Sequence errors for any * packets that could be still in the fabric. */ - flow = find_flow(req, psn, NULL); - if (!flow) { - /* - * We can't find the IB PSN matching the - * received KDETH PSN. The only thing we can - * do at this point is report the error to - * the QP. - */ - hfi1_kern_read_tid_flow_free(qp); - spin_unlock(&qp->s_lock); - rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); - return ret; - } + flow = &req->flows[req->clear_tail]; if (priv->s_flags & HFI1_R_TID_SW_PSN) { diff = cmp_psn(psn, flow->flow_state.r_next_psn); From c56b593d2af4cbd189c6af5fd6790728fade80cc Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 15 Jul 2019 05:19:13 -0400 Subject: [PATCH 018/277] RDMA/bnxt_re: Honor vlan_id in GID entry comparison A GID entry consists of GID, vlan, netdev and smac. Extend GID duplicate check comparisons to consider vlan_id as well to support IPv6 VLAN based link local addresses. Introduce a new structure (bnxt_qplib_gid_info) to hold gid and vlan_id information. The issue is discussed in the following thread https://lore.kernel.org/r/AM0PR05MB4866CFEDCDF3CDA1D7D18AA5D1F20@AM0PR05MB4866.eurprd05.prod.outlook.com Fixes: 823b23da7113 ("IB/core: Allow vlan link local address based RoCE GIDs") Cc: # v5.2+ Link: https://lore.kernel.org/r/20190715091913.15726-1-selvin.xavier@broadcom.com Reported-by: Yi Zhang Co-developed-by: Parav Pandit Signed-off-by: Parav Pandit Signed-off-by: Selvin Xavier Tested-by: Yi Zhang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 7 +++++-- drivers/infiniband/hw/bnxt_re/qplib_res.c | 13 +++++++++---- drivers/infiniband/hw/bnxt_re/qplib_res.h | 2 +- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 14 +++++++++----- drivers/infiniband/hw/bnxt_re/qplib_sp.h | 7 ++++++- 5 files changed, 30 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index a91653aabf38..098ab883733e 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -308,6 +308,7 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context) struct bnxt_re_dev *rdev = to_bnxt_re_dev(attr->device, ibdev); struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl; struct bnxt_qplib_gid *gid_to_del; + u16 vlan_id = 0xFFFF; /* Delete the entry from the hardware */ ctx = *context; @@ -317,7 +318,8 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context) if (sgid_tbl && sgid_tbl->active) { if (ctx->idx >= sgid_tbl->max) return -EINVAL; - gid_to_del = &sgid_tbl->tbl[ctx->idx]; + gid_to_del = &sgid_tbl->tbl[ctx->idx].gid; + vlan_id = sgid_tbl->tbl[ctx->idx].vlan_id; /* DEL_GID is called in WQ context(netdevice_event_work_handler) * or via the ib_unregister_device path. In the former case QP1 * may not be destroyed yet, in which case just return as FW @@ -335,7 +337,8 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context) } ctx->refcnt--; if (!ctx->refcnt) { - rc = bnxt_qplib_del_sgid(sgid_tbl, gid_to_del, true); + rc = bnxt_qplib_del_sgid(sgid_tbl, gid_to_del, + vlan_id, true); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to remove GID: %#x", rc); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 37928b1111df..bdbde8e22420 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -488,7 +488,7 @@ static int bnxt_qplib_alloc_sgid_tbl(struct bnxt_qplib_res *res, struct bnxt_qplib_sgid_tbl *sgid_tbl, u16 max) { - sgid_tbl->tbl = kcalloc(max, sizeof(struct bnxt_qplib_gid), GFP_KERNEL); + sgid_tbl->tbl = kcalloc(max, sizeof(*sgid_tbl->tbl), GFP_KERNEL); if (!sgid_tbl->tbl) return -ENOMEM; @@ -526,9 +526,10 @@ static void bnxt_qplib_cleanup_sgid_tbl(struct bnxt_qplib_res *res, for (i = 0; i < sgid_tbl->max; i++) { if (memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero, sizeof(bnxt_qplib_gid_zero))) - bnxt_qplib_del_sgid(sgid_tbl, &sgid_tbl->tbl[i], true); + bnxt_qplib_del_sgid(sgid_tbl, &sgid_tbl->tbl[i].gid, + sgid_tbl->tbl[i].vlan_id, true); } - memset(sgid_tbl->tbl, 0, sizeof(struct bnxt_qplib_gid) * sgid_tbl->max); + memset(sgid_tbl->tbl, 0, sizeof(*sgid_tbl->tbl) * sgid_tbl->max); memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max); memset(sgid_tbl->vlan, 0, sizeof(u8) * sgid_tbl->max); sgid_tbl->active = 0; @@ -537,7 +538,11 @@ static void bnxt_qplib_cleanup_sgid_tbl(struct bnxt_qplib_res *res, static void bnxt_qplib_init_sgid_tbl(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct net_device *netdev) { - memset(sgid_tbl->tbl, 0, sizeof(struct bnxt_qplib_gid) * sgid_tbl->max); + u32 i; + + for (i = 0; i < sgid_tbl->max; i++) + sgid_tbl->tbl[i].vlan_id = 0xffff; + memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max); } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 30c42c92fac7..fbda11a7ab1a 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -111,7 +111,7 @@ struct bnxt_qplib_pd_tbl { }; struct bnxt_qplib_sgid_tbl { - struct bnxt_qplib_gid *tbl; + struct bnxt_qplib_gid_info *tbl; u16 *hw_id; u16 max; u16 active; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 48793d3512ac..40296b97d21e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -213,12 +213,12 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res, index, sgid_tbl->max); return -EINVAL; } - memcpy(gid, &sgid_tbl->tbl[index], sizeof(*gid)); + memcpy(gid, &sgid_tbl->tbl[index].gid, sizeof(*gid)); return 0; } int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, - struct bnxt_qplib_gid *gid, bool update) + struct bnxt_qplib_gid *gid, u16 vlan_id, bool update) { struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl, struct bnxt_qplib_res, @@ -236,7 +236,8 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, return -ENOMEM; } for (index = 0; index < sgid_tbl->max; index++) { - if (!memcmp(&sgid_tbl->tbl[index], gid, sizeof(*gid))) + if (!memcmp(&sgid_tbl->tbl[index].gid, gid, sizeof(*gid)) && + vlan_id == sgid_tbl->tbl[index].vlan_id) break; } if (index == sgid_tbl->max) { @@ -262,8 +263,9 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, if (rc) return rc; } - memcpy(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero, + memcpy(&sgid_tbl->tbl[index].gid, &bnxt_qplib_gid_zero, sizeof(bnxt_qplib_gid_zero)); + sgid_tbl->tbl[index].vlan_id = 0xFFFF; sgid_tbl->vlan[index] = 0; sgid_tbl->active--; dev_dbg(&res->pdev->dev, @@ -296,7 +298,8 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, } free_idx = sgid_tbl->max; for (i = 0; i < sgid_tbl->max; i++) { - if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid))) { + if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid)) && + sgid_tbl->tbl[i].vlan_id == vlan_id) { dev_dbg(&res->pdev->dev, "SGID entry already exist in entry %d!\n", i); *index = i; @@ -351,6 +354,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, } /* Add GID to the sgid_tbl */ memcpy(&sgid_tbl->tbl[free_idx], gid, sizeof(*gid)); + sgid_tbl->tbl[free_idx].vlan_id = vlan_id; sgid_tbl->active++; if (vlan_id != 0xFFFF) sgid_tbl->vlan[free_idx] = 1; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 0ec3b12b0bcd..13d9432d5ce2 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -84,6 +84,11 @@ struct bnxt_qplib_gid { u8 data[16]; }; +struct bnxt_qplib_gid_info { + struct bnxt_qplib_gid gid; + u16 vlan_id; +}; + struct bnxt_qplib_ah { struct bnxt_qplib_gid dgid; struct bnxt_qplib_pd *pd; @@ -221,7 +226,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res, struct bnxt_qplib_sgid_tbl *sgid_tbl, int index, struct bnxt_qplib_gid *gid); int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, - struct bnxt_qplib_gid *gid, bool update); + struct bnxt_qplib_gid *gid, u16 vlan_id, bool update); int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct bnxt_qplib_gid *gid, u8 *mac, u16 vlan_id, bool update, u32 *index); From b7f406bb883ba7ac3222298f6b44cebc4cfe2dde Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Wed, 17 Jul 2019 16:21:01 +0800 Subject: [PATCH 019/277] IB/mlx5: Replace kfree with kvfree Memory allocated by kvzalloc should not be freed by kfree(), use kvfree() instead. Fixes: 813e90b1aeaa ("IB/mlx5: Add advise_mr() support") Link: https://lore.kernel.org/r/20190717082101.14196-1-hslester96@gmail.com Signed-off-by: Chuhong Yuan Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 5b642d81e617..36ba901cc9a5 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1771,7 +1771,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *work) num_pending_prefetch_dec(to_mdev(w->pd->device), w->sg_list, w->num_sge, 0); - kfree(w); + kvfree(w); } int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, @@ -1813,7 +1813,7 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, if (valid_req) queue_work(system_unbound_wq, &work->work); else - kfree(work); + kvfree(work); srcu_read_unlock(&dev->mr_srcu, srcu_key); From af0653d56657340a80622aeb96707f7fc8506225 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Fri, 19 Jul 2019 09:29:38 +0800 Subject: [PATCH 020/277] RDMA/siw: Remove set but not used variables 'rv' Fixes gcc '-Wunused-but-set-variable' warning: drivers/infiniband/sw/siw/siw_cm.c: In function siw_cep_set_inuse: drivers/infiniband/sw/siw/siw_cm.c:223:6: warning: variable rv set but not used [-Wunused-but-set-variable] Fixes: 6c52fdc244b5 ("rdma/siw: connection management") Link: https://lore.kernel.org/r/20190719012938.100628-1-maowenan@huawei.com Signed-off-by: Mao Wenan Reviewed-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_cm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index a7cde98e73e8..9ce8a1b925d2 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -220,13 +220,12 @@ static void siw_put_work(struct siw_cm_work *work) static void siw_cep_set_inuse(struct siw_cep *cep) { unsigned long flags; - int rv; retry: spin_lock_irqsave(&cep->lock, flags); if (cep->in_use) { spin_unlock_irqrestore(&cep->lock, flags); - rv = wait_event_interruptible(cep->waitq, !cep->in_use); + wait_event_interruptible(cep->waitq, !cep->in_use); if (signal_pending(current)) flush_signals(current); goto retry; From 1573eebeaa8055777eb753f9b4d1cbe653380c38 Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Tue, 25 Jun 2019 12:10:02 +0300 Subject: [PATCH 021/277] clk: at91: generated: Truncate divisor to GENERATED_MAX_DIV + 1 In clk_generated_determine_rate(), if the divisor is greater than GENERATED_MAX_DIV + 1, then the wrong best_rate will be returned. If clk_generated_set_rate() will be called later with this wrong rate, it will return -EINVAL, so the generated clock won't change its value. Do no let the divisor be greater than GENERATED_MAX_DIV + 1. Fixes: 8c7aa6328947 ("clk: at91: clk-generated: remove useless divisor loop") Signed-off-by: Codrin Ciubotariu Acked-by: Nicolas Ferre Acked-by: Ludovic Desroches Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-generated.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c index 44db83a6d01c..44a46dcc0518 100644 --- a/drivers/clk/at91/clk-generated.c +++ b/drivers/clk/at91/clk-generated.c @@ -141,6 +141,8 @@ static int clk_generated_determine_rate(struct clk_hw *hw, continue; div = DIV_ROUND_CLOSEST(parent_rate, req->rate); + if (div > GENERATED_MAX_DIV + 1) + div = GENERATED_MAX_DIV + 1; clk_generated_best_diff(req, parent, parent_rate, div, &best_diff, &best_rate); From 6ee82ef04e38b0d8b09b04bc1b068673deed6582 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Mon, 1 Jul 2019 13:46:51 +0200 Subject: [PATCH 022/277] clk: Add missing documentation of devm_clk_bulk_get_optional() argument Fix an incomplete devm_clk_bulk_get_optional() function documentation by adding description of the num_clks argument as in other *clk_bulk* functions. Fixes: 9bd5ef0bd874 ("clk: Add devm_clk_bulk_get_optional() function") Reported-by: kbuild test robot Signed-off-by: Sylwester Nawrocki Signed-off-by: Stephen Boyd --- include/linux/clk.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/clk.h b/include/linux/clk.h index 3c096c7a51dc..853a8f181394 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -359,6 +359,7 @@ int __must_check devm_clk_bulk_get(struct device *dev, int num_clks, /** * devm_clk_bulk_get_optional - managed get multiple optional consumer clocks * @dev: device for clock "consumer" + * @num_clks: the number of clk_bulk_data * @clks: pointer to the clk_bulk_data table of consumer * * Behaves the same as devm_clk_bulk_get() except where there is no clock From c93d059a80450af99dd6c0e8c36790579343675a Mon Sep 17 00:00:00 2001 From: Weiyi Lu Date: Fri, 28 Jun 2019 15:22:34 +0800 Subject: [PATCH 023/277] clk: mediatek: mt8183: Register 13MHz clock earlier for clocksource The 13MHz clock should be registered before clocksource driver is initialized. Use CLK_OF_DECLARE_DRIVER() to guarantee. Fixes: acddfc2c261b ("clk: mediatek: Add MT8183 clock support") Cc: Signed-off-by: Weiyi Lu Signed-off-by: Stephen Boyd --- drivers/clk/mediatek/clk-mt8183.c | 46 +++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/drivers/clk/mediatek/clk-mt8183.c b/drivers/clk/mediatek/clk-mt8183.c index 1aa5f4059251..73b7e238eee7 100644 --- a/drivers/clk/mediatek/clk-mt8183.c +++ b/drivers/clk/mediatek/clk-mt8183.c @@ -25,9 +25,11 @@ static const struct mtk_fixed_clk top_fixed_clks[] = { FIXED_CLK(CLK_TOP_UNIVP_192M, "univpll_192m", "univpll", 192000000), }; +static const struct mtk_fixed_factor top_early_divs[] = { + FACTOR(CLK_TOP_CLK13M, "clk13m", "clk26m", 1, 2), +}; + static const struct mtk_fixed_factor top_divs[] = { - FACTOR(CLK_TOP_CLK13M, "clk13m", "clk26m", 1, - 2), FACTOR(CLK_TOP_F26M_CK_D2, "csw_f26m_ck_d2", "clk26m", 1, 2), FACTOR(CLK_TOP_SYSPLL_CK, "syspll_ck", "mainpll", 1, @@ -1148,37 +1150,57 @@ static int clk_mt8183_apmixed_probe(struct platform_device *pdev) return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data); } +static struct clk_onecell_data *top_clk_data; + +static void clk_mt8183_top_init_early(struct device_node *node) +{ + int i; + + top_clk_data = mtk_alloc_clk_data(CLK_TOP_NR_CLK); + + for (i = 0; i < CLK_TOP_NR_CLK; i++) + top_clk_data->clks[i] = ERR_PTR(-EPROBE_DEFER); + + mtk_clk_register_factors(top_early_divs, ARRAY_SIZE(top_early_divs), + top_clk_data); + + of_clk_add_provider(node, of_clk_src_onecell_get, top_clk_data); +} + +CLK_OF_DECLARE_DRIVER(mt8183_topckgen, "mediatek,mt8183-topckgen", + clk_mt8183_top_init_early); + static int clk_mt8183_top_probe(struct platform_device *pdev) { struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); void __iomem *base; - struct clk_onecell_data *clk_data; struct device_node *node = pdev->dev.of_node; base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(base)) return PTR_ERR(base); - clk_data = mtk_alloc_clk_data(CLK_TOP_NR_CLK); - mtk_clk_register_fixed_clks(top_fixed_clks, ARRAY_SIZE(top_fixed_clks), - clk_data); + top_clk_data); - mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); + mtk_clk_register_factors(top_early_divs, ARRAY_SIZE(top_early_divs), + top_clk_data); + + mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), top_clk_data); mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), - node, &mt8183_clk_lock, clk_data); + node, &mt8183_clk_lock, top_clk_data); mtk_clk_register_composites(top_aud_muxes, ARRAY_SIZE(top_aud_muxes), - base, &mt8183_clk_lock, clk_data); + base, &mt8183_clk_lock, top_clk_data); mtk_clk_register_composites(top_aud_divs, ARRAY_SIZE(top_aud_divs), - base, &mt8183_clk_lock, clk_data); + base, &mt8183_clk_lock, top_clk_data); mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), - clk_data); + top_clk_data); - return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data); + return of_clk_add_provider(node, of_clk_src_onecell_get, top_clk_data); } static int clk_mt8183_infra_probe(struct platform_device *pdev) From c9a67cbb5189e966c70451562b2ca4c3876ab546 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Thu, 18 Jul 2019 13:36:16 +0800 Subject: [PATCH 024/277] clk: sprd: Select REGMAP_MMIO to avoid compile errors Make REGMAP_MMIO selected to avoid undefined reference to regmap symbols. Fixes: d41f59fd92f2 ("clk: sprd: Add common infrastructure") Signed-off-by: Chunyan Zhang Signed-off-by: Stephen Boyd --- drivers/clk/sprd/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/sprd/Kconfig b/drivers/clk/sprd/Kconfig index 91d3d721c801..3c219af25100 100644 --- a/drivers/clk/sprd/Kconfig +++ b/drivers/clk/sprd/Kconfig @@ -3,6 +3,7 @@ config SPRD_COMMON_CLK tristate "Clock support for Spreadtrum SoCs" depends on ARCH_SPRD || COMPILE_TEST default ARCH_SPRD + select REGMAP_MMIO if SPRD_COMMON_CLK From e1f1ae8002e4b06addc52443fcd975bbf554ae92 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 11 Jul 2019 15:03:59 +0200 Subject: [PATCH 025/277] clk: renesas: cpg-mssr: Fix reset control race condition The module reset code in the Renesas CPG/MSSR driver uses read-modify-write (RMW) operations to write to a Software Reset Register (SRCRn), and simple writes to write to a Software Reset Clearing Register (SRSTCLRn), as was mandated by the R-Car Gen2 and Gen3 Hardware User's Manuals. However, this may cause a race condition when two devices are reset in parallel: if the reset for device A completes in the middle of the RMW operation for device B, device A may be reset again, causing subtle failures (e.g. i2c timeouts): thread A thread B -------- -------- val = SRCRn val |= bit A SRCRn = val delay val = SRCRn (bit A is set) SRSTCLRn = bit A (bit A in SRCRn is cleared) val |= bit B SRCRn = val (bit A and B are set) This can be reproduced on e.g. Salvator-XS using: $ while true; do i2cdump -f -y 4 0x6A b > /dev/null; done & $ while true; do i2cdump -f -y 2 0x10 b > /dev/null; done & i2c-rcar e6510000.i2c: error -110 : 40000002 i2c-rcar e66d8000.i2c: error -110 : 40000002 According to the R-Car Gen3 Hardware Manual Errata for Rev. 0.80 of Feb 28, 2018, reflected in Rev. 1.00 of the R-Car Gen3 Hardware User's Manual, writes to SRCRn do not require read-modify-write cycles. Note that the R-Car Gen2 Hardware User's Manual has not been updated yet, and still says a read-modify-write sequence is required. According to the hardware team, the reset hardware block is the same on both R-Car Gen2 and Gen3, though. Hence fix the issue by replacing the read-modify-write operations on SRCRn by simple writes. Reported-by: Yao Lihua Fixes: 6197aa65c4905532 ("clk: renesas: cpg-mssr: Add support for reset control") Signed-off-by: Geert Uytterhoeven Tested-by: Linh Phung Signed-off-by: Stephen Boyd --- drivers/clk/renesas/renesas-cpg-mssr.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/clk/renesas/renesas-cpg-mssr.c b/drivers/clk/renesas/renesas-cpg-mssr.c index 52bbb9ce3807..d4075b130674 100644 --- a/drivers/clk/renesas/renesas-cpg-mssr.c +++ b/drivers/clk/renesas/renesas-cpg-mssr.c @@ -572,17 +572,11 @@ static int cpg_mssr_reset(struct reset_controller_dev *rcdev, unsigned int reg = id / 32; unsigned int bit = id % 32; u32 bitmask = BIT(bit); - unsigned long flags; - u32 value; dev_dbg(priv->dev, "reset %u%02u\n", reg, bit); /* Reset module */ - spin_lock_irqsave(&priv->rmw_lock, flags); - value = readl(priv->base + SRCR(reg)); - value |= bitmask; - writel(value, priv->base + SRCR(reg)); - spin_unlock_irqrestore(&priv->rmw_lock, flags); + writel(bitmask, priv->base + SRCR(reg)); /* Wait for at least one cycle of the RCLK clock (@ ca. 32 kHz) */ udelay(35); @@ -599,16 +593,10 @@ static int cpg_mssr_assert(struct reset_controller_dev *rcdev, unsigned long id) unsigned int reg = id / 32; unsigned int bit = id % 32; u32 bitmask = BIT(bit); - unsigned long flags; - u32 value; dev_dbg(priv->dev, "assert %u%02u\n", reg, bit); - spin_lock_irqsave(&priv->rmw_lock, flags); - value = readl(priv->base + SRCR(reg)); - value |= bitmask; - writel(value, priv->base + SRCR(reg)); - spin_unlock_irqrestore(&priv->rmw_lock, flags); + writel(bitmask, priv->base + SRCR(reg)); return 0; } From 36876b30d2143121322cd199cc23f27143912deb Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Tue, 23 Jul 2019 19:22:52 +0900 Subject: [PATCH 026/277] selftests: kmod: Fix typo in kmod.sh This patch fixes some spelling typos in kmod.sh Signed-off-by: Masanari Iida Acked-by: Randy Dunlap Signed-off-by: Shuah Khan --- tools/testing/selftests/kmod/kmod.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index 0a76314b4414..8b944cf042f6 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -28,7 +28,7 @@ # override by exporting to your environment prior running this script. # For instance this script assumes you do not have xfs loaded upon boot. # If this is false, export DEFAULT_KMOD_FS="ext4" prior to running this -# script if the filesyste module you don't have loaded upon bootup +# script if the filesystem module you don't have loaded upon bootup # is ext4 instead. Refer to allow_user_defaults() for a list of user # override variables possible. # @@ -263,7 +263,7 @@ config_get_test_result() config_reset() { if ! echo -n "1" >"$DIR"/reset; then - echo "$0: reset shuld have worked" >&2 + echo "$0: reset should have worked" >&2 exit 1 fi } @@ -488,7 +488,7 @@ usage() echo Example uses: echo echo "${TEST_NAME}.sh -- executes all tests" - echo "${TEST_NAME}.sh -t 0008 -- Executes test ID 0008 number of times is recomended" + echo "${TEST_NAME}.sh -t 0008 -- Executes test ID 0008 number of times is recommended" echo "${TEST_NAME}.sh -w 0008 -- Watch test ID 0008 run until an error occurs" echo "${TEST_NAME}.sh -s 0008 -- Run test ID 0008 once" echo "${TEST_NAME}.sh -c 0008 3 -- Run test ID 0008 three times" From 399ea57a4c8bc5fdb71a024a6870b5767b2ef6d8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 1 Jul 2019 14:04:31 +0100 Subject: [PATCH 027/277] selftests/x86: fix spelling mistake "FAILT" -> "FAIL" There is an spelling mistake in an a test error message. Fix it. Acked-by: Andy Lutomirski Signed-off-by: Colin Ian King Signed-off-by: Shuah Khan --- tools/testing/selftests/x86/test_vsyscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index 4602326b8f5b..a4f4d4cf22c3 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -451,7 +451,7 @@ static int test_vsys_x(void) printf("[OK]\tExecuting the vsyscall page failed: #PF(0x%lx)\n", segv_err); } else { - printf("[FAILT]\tExecution failed with the wrong error: #PF(0x%lx)\n", + printf("[FAIL]\tExecution failed with the wrong error: #PF(0x%lx)\n", segv_err); return 1; } From 66d7780f18eae0232827fcffeaded39a6a168236 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 Jul 2019 11:51:56 -0700 Subject: [PATCH 028/277] dma-mapping: check pfn validity in dma_common_{mmap,get_sgtable} Check that the pfn returned from arch_dma_coherent_to_pfn refers to a valid page and reject the mmap / get_sgtable requests otherwise. Based on the arm implementation of the mmap and get_sgtable methods. Signed-off-by: Christoph Hellwig Tested-by: Vignesh Raghavendra --- kernel/dma/mapping.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 1f628e7ac709..b945239621d8 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -116,11 +116,16 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, int ret; if (!dev_is_dma_coherent(dev)) { + unsigned long pfn; + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN)) return -ENXIO; - page = pfn_to_page(arch_dma_coherent_to_pfn(dev, cpu_addr, - dma_addr)); + /* If the PFN is not valid, we do not have a struct page */ + pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr); + if (!pfn_valid(pfn)) + return -ENXIO; + page = pfn_to_page(pfn); } else { page = virt_to_page(cpu_addr); } @@ -170,7 +175,11 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, if (!dev_is_dma_coherent(dev)) { if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN)) return -ENXIO; + + /* If the PFN is not valid, we do not have a struct page */ pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr); + if (!pfn_valid(pfn)) + return -ENXIO; } else { pfn = page_to_pfn(virt_to_page(cpu_addr)); } From ad3c7b18c5b362be5dbd0f2c0bcf1fd5fd659315 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 Jul 2019 11:33:12 +0200 Subject: [PATCH 029/277] arm: use swiotlb for bounce buffering on LPAE configs The DMA API requires that 32-bit DMA masks are always supported, but on arm LPAE configs they do not currently work when memory is present above 4GB. Wire up the swiotlb code like for all other architectures to provide the bounce buffering in that case. Fixes: 21e07dba9fb11 ("scsi: reduce use of block bounce buffers"). Reported-by: Roger Quadros Signed-off-by: Christoph Hellwig Tested-by: Vignesh Raghavendra --- arch/arm/include/asm/dma-mapping.h | 4 +- arch/arm/mm/Kconfig | 5 +++ arch/arm/mm/dma-mapping.c | 61 ++++++++++++++++++++++++++++++ arch/arm/mm/init.c | 5 +++ 4 files changed, 74 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 7e0486ad1318..dba9355e2484 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -18,7 +18,9 @@ extern const struct dma_map_ops arm_coherent_dma_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - return IS_ENABLED(CONFIG_MMU) ? &arm_dma_ops : NULL; + if (IS_ENABLED(CONFIG_MMU) && !IS_ENABLED(CONFIG_ARM_LPAE)) + return &arm_dma_ops; + return NULL; } #ifdef __arch_page_to_dma diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 820b60a50125..c54cd7ed90ba 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -663,6 +663,11 @@ config ARM_LPAE depends on MMU && CPU_32v7 && !CPU_32v6 && !CPU_32v5 && \ !CPU_32v4 && !CPU_32v3 select PHYS_ADDR_T_64BIT + select SWIOTLB + select ARCH_HAS_DMA_COHERENT_TO_PFN + select ARCH_HAS_DMA_MMAP_PGPROT + select ARCH_HAS_SYNC_DMA_FOR_DEVICE + select ARCH_HAS_SYNC_DMA_FOR_CPU help Say Y if you have an ARMv7 processor supporting the LPAE page table format and you would like to access memory beyond the diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 4789c60a86e3..6774b03aa405 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -1125,6 +1126,19 @@ int arm_dma_supported(struct device *dev, u64 mask) static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent) { + /* + * When CONFIG_ARM_LPAE is set, physical address can extend above + * 32-bits, which then can't be addressed by devices that only support + * 32-bit DMA. + * Use the generic dma-direct / swiotlb ops code in that case, as that + * handles bounce buffering for us. + * + * Note: this checks CONFIG_ARM_LPAE instead of CONFIG_SWIOTLB as the + * latter is also selected by the Xen code, but that code for now relies + * on non-NULL dev_dma_ops. To be cleaned up later. + */ + if (IS_ENABLED(CONFIG_ARM_LPAE)) + return NULL; return coherent ? &arm_coherent_dma_ops : &arm_dma_ops; } @@ -2329,6 +2343,9 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct dma_map_ops *dma_ops; dev->archdata.dma_coherent = coherent; +#ifdef CONFIG_SWIOTLB + dev->dma_coherent = coherent; +#endif /* * Don't override the dma_ops if they have already been set. Ideally @@ -2363,3 +2380,47 @@ void arch_teardown_dma_ops(struct device *dev) /* Let arch_setup_dma_ops() start again from scratch upon re-probe */ set_dma_ops(dev, NULL); } + +#ifdef CONFIG_SWIOTLB +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) +{ + __dma_page_cpu_to_dev(phys_to_page(paddr), paddr & (PAGE_SIZE - 1), + size, dir); +} + +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) +{ + __dma_page_dev_to_cpu(phys_to_page(paddr), paddr & (PAGE_SIZE - 1), + size, dir); +} + +long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, + dma_addr_t dma_addr) +{ + return dma_to_pfn(dev, dma_addr); +} + +pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, + unsigned long attrs) +{ + if (!dev_is_dma_coherent(dev)) + return __get_dma_pgprot(attrs, prot); + return prot; +} + +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs) +{ + return __dma_alloc(dev, size, dma_handle, gfp, + __get_dma_pgprot(attrs, PAGE_KERNEL), false, + attrs, __builtin_return_address(0)); +} + +void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle, unsigned long attrs) +{ + __arm_dma_free(dev, size, cpu_addr, dma_handle, attrs, false); +} +#endif /* CONFIG_SWIOTLB */ diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 4920a206dce9..16d373d587c4 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -463,6 +464,10 @@ static void __init free_highpages(void) */ void __init mem_init(void) { +#ifdef CONFIG_ARM_LPAE + swiotlb_init(1); +#endif + set_max_mapnr(pfn_to_page(max_pfn) - mem_map); /* this will put all unused low memory onto the freelists */ From 2e05ec4834f9bb7fe3575e4c48acebebfa1e24d9 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Wed, 24 Jul 2019 23:15:54 +0900 Subject: [PATCH 030/277] selftests: mlxsw: Fix typo in qos_mc_aware.sh This patch fixes some spelling typo in qos_mc_aware.sh Signed-off-by: Masanari Iida Acked-by: Randy Dunlap Reviewed-by: Ido Schimmel Signed-off-by: Shuah Khan --- tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index 71231ad2dbfb..47315fe48d5a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -262,7 +262,7 @@ test_mc_aware() stop_traffic - log_test "UC performace under MC overload" + log_test "UC performance under MC overload" echo "UC-only throughput $(humanize $ucth1)" echo "UC+MC throughput $(humanize $ucth2)" @@ -316,7 +316,7 @@ test_uc_aware() stop_traffic - log_test "MC performace under UC overload" + log_test "MC performance under UC overload" echo " ingress UC throughput $(humanize ${uc_ir})" echo " egress UC throughput $(humanize ${uc_er})" echo " sent $attempts BC ARPs, got $passes responses" From 6a053953739d23694474a5f9c81d1a30093da81a Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 23 Jul 2019 09:57:25 +0300 Subject: [PATCH 031/277] IB/mlx5: Fix unreg_umr to ignore the mkey state Fix unreg_umr to ignore the mkey state and do not fail if was freed. This prevents a case that a user space application already changed the mkey state to free and then the UMR operation will fail leaving the mkey in an inappropriate state. Link: https://lore.kernel.org/r/20190723065733.4899-3-leon@kernel.org Cc: # 3.19 Fixes: 968e78dd9644 ("IB/mlx5: Enhance UMR support to allow partial page table update") Signed-off-by: Yishai Hadas Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/mr.c | 4 ++-- drivers/infiniband/hw/mlx5/qp.c | 12 ++++++++---- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index c482f19958b3..f6a53455bf8b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -481,6 +481,7 @@ struct mlx5_umr_wr { u64 length; int access_flags; u32 mkey; + u8 ignore_free_state:1; }; static inline const struct mlx5_umr_wr *umr_wr(const struct ib_send_wr *wr) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 20ece6e0b2fc..266edaf8029d 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1372,10 +1372,10 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) return 0; - umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | - MLX5_IB_SEND_UMR_FAIL_IF_FREE; + umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR; umrwr.wr.opcode = MLX5_IB_WR_UMR; umrwr.mkey = mr->mmkey.key; + umrwr.ignore_free_state = 1; return mlx5_ib_post_send_wait(dev, &umrwr); } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 2a97619ed603..615cc6771516 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4295,10 +4295,14 @@ static int set_reg_umr_segment(struct mlx5_ib_dev *dev, memset(umr, 0, sizeof(*umr)); - if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE) - umr->flags = MLX5_UMR_CHECK_FREE; /* fail if free */ - else - umr->flags = MLX5_UMR_CHECK_NOT_FREE; /* fail if not free */ + if (!umrwr->ignore_free_state) { + if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE) + /* fail if free */ + umr->flags = MLX5_UMR_CHECK_FREE; + else + /* fail if not free */ + umr->flags = MLX5_UMR_CHECK_NOT_FREE; + } umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size)); if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) { From afd1417404fba6dbfa6c0a8e5763bd348da682e4 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 23 Jul 2019 09:57:26 +0300 Subject: [PATCH 032/277] IB/mlx5: Use direct mkey destroy command upon UMR unreg failure Use a direct firmware command to destroy the mkey in case the unreg UMR operation has failed. This prevents a case that a mkey will leak out from the cache post a failure to be destroyed by a UMR WR. In case the MR cache limit didn't reach a call to add another entry to the cache instead of the destroyed one is issued. In addition, replaced a warn message to WARN_ON() as this flow is fatal and can't happen unless some bug around. Link: https://lore.kernel.org/r/20190723065733.4899-4-leon@kernel.org Cc: # 4.10 Fixes: 49780d42dfc9 ("IB/mlx5: Expose MR cache for mlx5_ib") Signed-off-by: Yishai Hadas Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 266edaf8029d..b83361aebf28 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -545,14 +545,17 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return; c = order2idx(dev, mr->order); - if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { - mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); + WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES); + + if (unreg_umr(dev, mr)) { + mr->allocated_from_cache = false; + destroy_mkey(dev, mr); + ent = &cache->ent[c]; + if (ent->cur < ent->limit) + queue_work(cache->wq, &ent->work); return; } - if (unreg_umr(dev, mr)) - return; - ent = &cache->ent[c]; spin_lock_irq(&ent->lock); list_add_tail(&mr->list, &ent->head); From 9ec4483a3f0f71a228a5933bc040441322bfb090 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 23 Jul 2019 09:57:27 +0300 Subject: [PATCH 033/277] IB/mlx5: Move MRs to a kernel PD when freeing them to the MR cache Fix unreg_umr to move the MR to a kernel owned PD (i.e. the UMR PD) which can't be accessed by userspace. This ensures that nothing can continue to access the MR once it has been placed in the kernels cache for reuse. MRs in the cache continue to have their HW state, including DMA tables, present. Even though the MR has been invalidated, changing the PD provides an additional layer of protection against use of the MR. Link: https://lore.kernel.org/r/20190723065733.4899-5-leon@kernel.org Cc: # 3.10 Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Yishai Hadas Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b83361aebf28..7274a9b9df58 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1375,8 +1375,10 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) return 0; - umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR; + umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | + MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; umrwr.wr.opcode = MLX5_IB_WR_UMR; + umrwr.pd = dev->umrc.pd; umrwr.mkey = mr->mmkey.key; umrwr.ignore_free_state = 1; From b9332dad987018745a0c0bb718d12dacfa760489 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 23 Jul 2019 09:57:28 +0300 Subject: [PATCH 034/277] IB/mlx5: Fix clean_mr() to work in the expected order Any dma map underlying the MR should only be freed once the MR is fenced at the hardware. As of the above we first destroy the MKEY and just after that can safely call to dma_unmap_single(). Link: https://lore.kernel.org/r/20190723065733.4899-6-leon@kernel.org Cc: # 4.3 Fixes: 8a187ee52b04 ("IB/mlx5: Support the new memory registration API") Signed-off-by: Yishai Hadas Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 7274a9b9df58..2c77456f359f 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1582,10 +1582,10 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) mr->sig = NULL; } - mlx5_free_priv_descs(mr); - - if (!allocated_from_cache) + if (!allocated_from_cache) { destroy_mkey(dev, mr); + mlx5_free_priv_descs(mr); + } } static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) From 527d37e9e575bc0e9024de9b499385e7bb31f1ad Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Wed, 24 Jul 2019 16:05:58 -0400 Subject: [PATCH 035/277] selftests/livepatch: add test skip handling Add a skip() message function that stops the test, logs an explanation, and sets the "skip" return code (4). Before loading a livepatch self-test kernel module, first verify that we've built and installed it by running a 'modprobe --dry-run'. This should catch a few environment issues, including !CONFIG_LIVEPATCH and !CONFIG_TEST_LIVEPATCH. In these cases, exit gracefully with the new skip() function. Reported-by: Jiri Benc Suggested-by: Shuah Khan Reviewed-by: Petr Mladek Signed-off-by: Joe Lawrence Signed-off-by: Shuah Khan --- .../testing/selftests/livepatch/functions.sh | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index 30195449c63c..edcfeace4655 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -13,6 +13,14 @@ function log() { echo "$1" > /dev/kmsg } +# skip(msg) - testing can't proceed +# msg - explanation +function skip() { + log "SKIP: $1" + echo "SKIP: $1" >&2 + exit 4 +} + # die(msg) - game over, man # msg - dying words function die() { @@ -43,6 +51,12 @@ function loop_until() { done } +function assert_mod() { + local mod="$1" + + modprobe --dry-run "$mod" &>/dev/null +} + function is_livepatch_mod() { local mod="$1" @@ -75,6 +89,9 @@ function __load_mod() { function load_mod() { local mod="$1"; shift + assert_mod "$mod" || + skip "unable to load module ${mod}, verify CONFIG_TEST_LIVEPATCH=m and run self-tests as root" + is_livepatch_mod "$mod" && die "use load_lp() to load the livepatch module $mod" @@ -88,6 +105,9 @@ function load_mod() { function load_lp_nowait() { local mod="$1"; shift + assert_mod "$mod" || + skip "unable to load module ${mod}, verify CONFIG_TEST_LIVEPATCH=m and run self-tests as root" + is_livepatch_mod "$mod" || die "module $mod is not a livepatch" From 71be7b0e7d4069822c89146daed800686db8f147 Mon Sep 17 00:00:00 2001 From: Asmaa Mnebhi Date: Wed, 24 Jul 2019 15:32:57 -0400 Subject: [PATCH 036/277] Fix uninitialized variable in ipmb_dev_int.c ret at line 112 of ipmb_dev_int.c is uninitialized which results in a warning during build regressions. This warning was found by build regression/improvement testing for v5.3-rc1. Reported-by: build regression/improvement testing for v5.3-rc1. Fixes: 51bd6f291583 ("Add support for IPMB driver") Signed-off-by: Asmaa Mnebhi Message-Id: <571dbb67cf58411d567953d9fb3739eb4789238b.1563996586.git.Asmaa@mellanox.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmb_dev_int.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmb_dev_int.c b/drivers/char/ipmi/ipmb_dev_int.c index 57204335c5f5..285e0b8f9a97 100644 --- a/drivers/char/ipmi/ipmb_dev_int.c +++ b/drivers/char/ipmi/ipmb_dev_int.c @@ -76,7 +76,7 @@ static ssize_t ipmb_read(struct file *file, char __user *buf, size_t count, struct ipmb_dev *ipmb_dev = to_ipmb_dev(file); struct ipmb_request_elem *queue_elem; struct ipmb_msg msg; - ssize_t ret; + ssize_t ret = 0; memset(&msg, 0, sizeof(msg)); From e3cacb73e626d885b8cf24103fed0ae26518e3c4 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 24 Jul 2019 17:39:47 -0700 Subject: [PATCH 037/277] xtensa: fix build for cores with coprocessors Assembly entry/return abstraction change didn't add asmmacro.h include statement to coprocessor.S, resulting in references to undefined macros abi_entry and abi_ret on cores that define XTENSA_HAVE_COPROCESSORS. Fix that by including asm/asmmacro.h from the coprocessor.S. Signed-off-by: Max Filippov --- arch/xtensa/kernel/coprocessor.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S index 60c220020054..80828b95a51f 100644 --- a/arch/xtensa/kernel/coprocessor.S +++ b/arch/xtensa/kernel/coprocessor.S @@ -14,6 +14,7 @@ #include #include +#include #include #include #include From 296e3a2aad09d328f22e54655c3d736033fe1ae8 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 23 Jul 2019 09:57:30 +0300 Subject: [PATCH 038/277] IB/mlx5: Prevent concurrent MR updates during invalidation The device requires that memory registration work requests that update the address translation table of a MR will be fenced if posted together. This scenario can happen when address ranges are invalidated by the mmu in separate concurrent calls to the invalidation callback. We prefer to block concurrent address updates for a single MR over fencing since making the decision if a WQE needs fencing will be more expensive and fencing all WQEs is a too radical choice. Further, it isn't clear that this code can even run safely concurrently, so a lock is a safer choice. Fixes: b4cfe447d47b ("IB/mlx5: Implement on demand paging by adding support for MMU notifiers") Link: https://lore.kernel.org/r/20190723065733.4899-8-leon@kernel.org Signed-off-by: Moni Shoua Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 36ba901cc9a5..81da82050d05 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -246,7 +246,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, * overwrite the same MTTs. Concurent invalidations might race us, * but they will write 0s as well, so no difference in the end result. */ - + mutex_lock(&umem_odp->umem_mutex); for (addr = start; addr < end; addr += BIT(umem_odp->page_shift)) { idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; /* @@ -278,6 +278,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, idx - blk_start_idx + 1, 0, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); + mutex_unlock(&umem_odp->umem_mutex); /* * We are now sure that the device will not access the * memory. We can safely unmap it, and mark it as dirty if From a379d1ce32f1c97b19d68cffbd195e025dbb9d43 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 23 Jul 2019 09:57:32 +0300 Subject: [PATCH 039/277] IB/core: Fix querying total rdma stats rdma_counter_init() may fail for a device. In such case while calculating total sum, ignore NULL hstats. This fixes below observed call trace. BUG: kernel NULL pointer dereference, address: 00000000000000a0 PGD 8000001009b30067 P4D 8000001009b30067 PUD 10549c9067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 55 PID: 20887 Comm: cat Kdump: loaded Not tainted 5.2.0-rc6-jdc+ #13 RIP: 0010:rdma_counter_get_hwstat_value+0xf2/0x150 [ib_core] Call Trace: show_hw_stats+0x5e/0x130 [ib_core] dev_attr_show+0x15/0x50 sysfs_kf_seq_show+0xc6/0x1a0 seq_read+0x132/0x370 vfs_read+0x89/0x140 ksys_read+0x5c/0xd0 do_syscall_64+0x5a/0x240 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: f34a55e497e8 ("RDMA/core: Get sum value of all counters when perform a sysfs stat read") Link: https://lore.kernel.org/r/20190723065733.4899-10-leon@kernel.org Signed-off-by: Parav Pandit Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/counters.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 01faef7bc061..c7d445635476 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -393,6 +393,9 @@ u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index) u64 sum; port_counter = &dev->port_data[port].port_counter; + if (!port_counter->hstats) + return 0; + sum = get_running_counters_hwstat_sum(dev, port, index); sum += port_counter->hstats->value[index]; From d191152f43a5869d7dbb50dd2a7a4b3b8b71f1f0 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 23 Jul 2019 09:57:33 +0300 Subject: [PATCH 040/277] IB/counters: Always initialize the port counter object Port counter objects should be initialized even if alloc_stats is unsupported, otherwise QP bind operations in user space can trigger a NULL pointer deference if they try to bind QP on RDMA device which doesn't support counters. Fixes: f34a55e497e8 ("RDMA/core: Get sum value of all counters when perform a sysfs stat read") Link: https://lore.kernel.org/r/20190723065733.4899-11-leon@kernel.org Signed-off-by: Parav Pandit Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/counters.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index c7d445635476..45d5164e9574 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -597,7 +597,7 @@ void rdma_counter_init(struct ib_device *dev) struct rdma_port_counter *port_counter; u32 port; - if (!dev->ops.alloc_hw_stats || !dev->port_data) + if (!dev->port_data) return; rdma_for_each_port(dev, port) { @@ -605,6 +605,9 @@ void rdma_counter_init(struct ib_device *dev) port_counter->mode.mode = RDMA_COUNTER_MODE_NONE; mutex_init(&port_counter->lock); + if (!dev->ops.alloc_hw_stats) + continue; + port_counter->hstats = dev->ops.alloc_hw_stats(dev, port); if (!port_counter->hstats) goto fail; @@ -627,9 +630,6 @@ void rdma_counter_release(struct ib_device *dev) struct rdma_port_counter *port_counter; u32 port; - if (!dev->ops.alloc_hw_stats) - return; - rdma_for_each_port(dev, port) { port_counter = &dev->port_data[port].port_counter; kfree(port_counter->hstats); From b7165bd0d6cbb93732559be6ea8774653b204480 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 23 Jul 2019 09:57:29 +0300 Subject: [PATCH 041/277] IB/mlx5: Fix RSS Toeplitz setup to be aligned with the HW specification The specification for the Toeplitz function doesn't require to set the key explicitly to be symmetric. In case a symmetric functionality is required a symmetric key can be simply used. Wrongly forcing the algorithm to symmetric causes the wrong packet distribution and a performance degradation. Link: https://lore.kernel.org/r/20190723065733.4899-7-leon@kernel.org Cc: # 4.7 Fixes: 28d6137008b2 ("IB/mlx5: Add RSS QP support") Signed-off-by: Yishai Hadas Reviewed-by: Alex Vainman Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 615cc6771516..379328b2598f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1713,7 +1713,6 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, } MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); - MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); memcpy(rss_key, ucmd.rx_hash_key, len); break; } From edbfe83def34153a05439ecb3352ae0bb65024de Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 29 Jun 2019 11:41:36 +0200 Subject: [PATCH 042/277] platform/x86: pcengines-apuv2: Fix softdep statement Only first MODULE_SOFTDEP statement is handled per module. Multiple dependencies must be expressed in a single statement. Signed-off-by: Jean Delvare Cc: "Enrico Weigelt, metux IT consult" Cc: Darren Hart Cc: Andy Shevchenko [andy: massaged commit message] Signed-off-by: Andy Shevchenko --- drivers/platform/x86/pcengines-apuv2.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/platform/x86/pcengines-apuv2.c b/drivers/platform/x86/pcengines-apuv2.c index b0d3110ae378..96b499c6929a 100644 --- a/drivers/platform/x86/pcengines-apuv2.c +++ b/drivers/platform/x86/pcengines-apuv2.c @@ -255,6 +255,4 @@ MODULE_DESCRIPTION("PC Engines APUv2/APUv3 board GPIO/LED/keys driver"); MODULE_LICENSE("GPL"); MODULE_DEVICE_TABLE(dmi, apu_gpio_dmi_table); MODULE_ALIAS("platform:pcengines-apuv2"); -MODULE_SOFTDEP("pre: platform:" AMD_FCH_GPIO_DRIVER_NAME); -MODULE_SOFTDEP("pre: platform:leds-gpio"); -MODULE_SOFTDEP("pre: platform:gpio_keys_polled"); +MODULE_SOFTDEP("pre: platform:" AMD_FCH_GPIO_DRIVER_NAME " platform:leds-gpio platform:gpio_keys_polled"); From 8732d85a69a0411f16a4b78df8fdc7b09c50a849 Mon Sep 17 00:00:00 2001 From: Mattias Jacobsson <2pi@mok.nu> Date: Fri, 19 Jul 2019 19:51:45 +0200 Subject: [PATCH 043/277] platform/x86: wmi: add missing struct parameter description Add a description for the context parameter in the struct wmi_device_id. Reported-by: kbuild test robot Fixes: a48e23385fcf ("platform/x86: wmi: add context pointer field to struct wmi_device_id") Signed-off-by: Mattias Jacobsson <2pi@mok.nu> Signed-off-by: Andy Shevchenko --- include/linux/mod_devicetable.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index b2c1648f7e5d..5714fd35a83c 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -814,6 +814,7 @@ struct tee_client_device_id { /** * struct wmi_device_id - WMI device identifier * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba + * @context: pointer to driver specific data */ struct wmi_device_id { const char guid_string[UUID_STRING_LEN+1]; From 6acf5d76ab685e921771abbbae5353929f3ebbe6 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Tue, 23 Jul 2019 11:20:22 +0200 Subject: [PATCH 044/277] Platform: OLPC: add SPI MODULE_DEVICE_TABLE The SPI bus creates a device with the modalias of "xo1.75-ec". This fixes XO-1.75 EC driver autoloading Fixes: 0c3d931b3ab9 ("Platform: OLPC: Add XO-1.75 EC driver") Signed-off-by: Lubomir Rintel Signed-off-by: Andy Shevchenko --- drivers/platform/olpc/olpc-xo175-ec.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/platform/olpc/olpc-xo175-ec.c b/drivers/platform/olpc/olpc-xo175-ec.c index 48d6f0d87583..83ed1fbf73cf 100644 --- a/drivers/platform/olpc/olpc-xo175-ec.c +++ b/drivers/platform/olpc/olpc-xo175-ec.c @@ -736,6 +736,12 @@ static const struct of_device_id olpc_xo175_ec_of_match[] = { }; MODULE_DEVICE_TABLE(of, olpc_xo175_ec_of_match); +static const struct spi_device_id olpc_xo175_ec_id_table[] = { + { "xo1.75-ec", 0 }, + {} +}; +MODULE_DEVICE_TABLE(spi, olpc_xo175_ec_id_table); + static struct spi_driver olpc_xo175_ec_spi_driver = { .driver = { .name = "olpc-xo175-ec", From 66013e8ec6850f9c62df6aea555fe7668e84dc3c Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Fri, 14 Jun 2019 13:39:40 +0530 Subject: [PATCH 045/277] platform/x86: intel_pmc_core: Add ICL-NNPI support to PMC Core Ice Lake Neural Network Processor for deep learning inference a.k.a. ICL-NNPI can re-use Ice Lake Mobile regmap to enable Intel PMC Core driver on it. Cc: Darren Hart Cc: Andy Shevchenko Cc: platform-driver-x86@vger.kernel.org Link: https://lkml.org/lkml/2019/6/5/1034 Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Andy Shevchenko --- drivers/platform/x86/intel_pmc_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/intel_pmc_core.c b/drivers/platform/x86/intel_pmc_core.c index 235c0b89f824..c510d0d72475 100644 --- a/drivers/platform/x86/intel_pmc_core.c +++ b/drivers/platform/x86/intel_pmc_core.c @@ -812,6 +812,7 @@ static const struct x86_cpu_id intel_pmc_core_ids[] = { INTEL_CPU_FAM6(KABYLAKE_DESKTOP, spt_reg_map), INTEL_CPU_FAM6(CANNONLAKE_MOBILE, cnp_reg_map), INTEL_CPU_FAM6(ICELAKE_MOBILE, icl_reg_map), + INTEL_CPU_FAM6(ICELAKE_NNPI, icl_reg_map), {} }; From 2bcbeaefde2f0384d6ad351c151b1a9fe7791a0a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 24 Jul 2019 08:52:52 +0200 Subject: [PATCH 046/277] mm/hmm: always return EBUSY for invalid ranges in hmm_range_{fault,snapshot} We should not have two different error codes for the same condition. EAGAIN must be reserved for the FAULT_FLAG_ALLOW_RETRY retry case and signals to the caller that the mmap_sem has been unlocked. Use EBUSY for the !valid case so that callers can get the locking right. Link: https://lore.kernel.org/r/20190724065258.16603-2-hch@lst.de Tested-by: Ralph Campbell Signed-off-by: Christoph Hellwig Reviewed-by: Ralph Campbell Reviewed-by: Jason Gunthorpe Reviewed-by: Felix Kuehling [jgg: elaborated commit message] Signed-off-by: Jason Gunthorpe --- Documentation/vm/hmm.rst | 2 +- mm/hmm.c | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst index 7d90964abbb0..710ce1c701bf 100644 --- a/Documentation/vm/hmm.rst +++ b/Documentation/vm/hmm.rst @@ -237,7 +237,7 @@ The usage pattern is:: ret = hmm_range_snapshot(&range); if (ret) { up_read(&mm->mmap_sem); - if (ret == -EAGAIN) { + if (ret == -EBUSY) { /* * No need to check hmm_range_wait_until_valid() return value * on retry we will get proper error with hmm_range_snapshot() diff --git a/mm/hmm.c b/mm/hmm.c index e1eedef129cf..16b6731a34db 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -946,7 +946,7 @@ EXPORT_SYMBOL(hmm_range_unregister); * @range: range * Return: -EINVAL if invalid argument, -ENOMEM out of memory, -EPERM invalid * permission (for instance asking for write and range is read only), - * -EAGAIN if you need to retry, -EFAULT invalid (ie either no valid + * -EBUSY if you need to retry, -EFAULT invalid (ie either no valid * vma or it is illegal to access that range), number of valid pages * in range->pfns[] (from range start address). * @@ -967,7 +967,7 @@ long hmm_range_snapshot(struct hmm_range *range) do { /* If range is no longer valid force retry. */ if (!range->valid) - return -EAGAIN; + return -EBUSY; vma = find_vma(hmm->mm, start); if (vma == NULL || (vma->vm_flags & device_vma)) @@ -1062,10 +1062,8 @@ long hmm_range_fault(struct hmm_range *range, bool block) do { /* If range is no longer valid force retry. */ - if (!range->valid) { - up_read(&hmm->mm->mmap_sem); - return -EAGAIN; - } + if (!range->valid) + return -EBUSY; vma = find_vma(hmm->mm, start); if (vma == NULL || (vma->vm_flags & device_vma)) From 02712bc3250849c1cf99d626aea98f610e695f34 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 24 Jul 2019 08:52:53 +0200 Subject: [PATCH 047/277] mm/hmm: move hmm_vma_range_done and hmm_vma_fault to nouveau These two functions are marked as a legacy APIs to get rid of, but seem to suit the current nouveau flow. Move it to the only user in preparation for fixing a locking bug involving caller and callee. All comments referring to the old API have been removed as this now is a driver private helper. Link: https://lore.kernel.org/r/20190724065258.16603-3-hch@lst.de Tested-by: Ralph Campbell Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_svm.c | 46 ++++++++++++++++++++++- include/linux/hmm.h | 54 --------------------------- 2 files changed, 44 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 8c92374afcf2..6c1b04de0db8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -475,6 +475,48 @@ nouveau_svm_fault_cache(struct nouveau_svm *svm, fault->inst, fault->addr, fault->access); } +static inline bool +nouveau_range_done(struct hmm_range *range) +{ + bool ret = hmm_range_valid(range); + + hmm_range_unregister(range); + return ret; +} + +static int +nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range, + bool block) +{ + long ret; + + range->default_flags = 0; + range->pfn_flags_mask = -1UL; + + ret = hmm_range_register(range, mirror, + range->start, range->end, + PAGE_SHIFT); + if (ret) + return (int)ret; + + if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) { + up_read(&range->vma->vm_mm->mmap_sem); + return -EAGAIN; + } + + ret = hmm_range_fault(range, block); + if (ret <= 0) { + if (ret == -EBUSY || !ret) { + up_read(&range->vma->vm_mm->mmap_sem); + ret = -EBUSY; + } else if (ret == -EAGAIN) + ret = -EBUSY; + hmm_range_unregister(range); + return ret; + } + return 0; +} + static int nouveau_svm_fault(struct nvif_notify *notify) { @@ -649,10 +691,10 @@ nouveau_svm_fault(struct nvif_notify *notify) range.values = nouveau_svm_pfn_values; range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT; again: - ret = hmm_vma_fault(&svmm->mirror, &range, true); + ret = nouveau_range_fault(&svmm->mirror, &range, true); if (ret == 0) { mutex_lock(&svmm->mutex); - if (!hmm_vma_range_done(&range)) { + if (!nouveau_range_done(&range)) { mutex_unlock(&svmm->mutex); goto again; } diff --git a/include/linux/hmm.h b/include/linux/hmm.h index b8a08b2a10ca..7ef56dc18050 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -484,60 +484,6 @@ long hmm_range_dma_unmap(struct hmm_range *range, */ #define HMM_RANGE_DEFAULT_TIMEOUT 1000 -/* This is a temporary helper to avoid merge conflict between trees. */ -static inline bool hmm_vma_range_done(struct hmm_range *range) -{ - bool ret = hmm_range_valid(range); - - hmm_range_unregister(range); - return ret; -} - -/* This is a temporary helper to avoid merge conflict between trees. */ -static inline int hmm_vma_fault(struct hmm_mirror *mirror, - struct hmm_range *range, bool block) -{ - long ret; - - /* - * With the old API the driver must set each individual entries with - * the requested flags (valid, write, ...). So here we set the mask to - * keep intact the entries provided by the driver and zero out the - * default_flags. - */ - range->default_flags = 0; - range->pfn_flags_mask = -1UL; - - ret = hmm_range_register(range, mirror, - range->start, range->end, - PAGE_SHIFT); - if (ret) - return (int)ret; - - if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) { - /* - * The mmap_sem was taken by driver we release it here and - * returns -EAGAIN which correspond to mmap_sem have been - * drop in the old API. - */ - up_read(&range->vma->vm_mm->mmap_sem); - return -EAGAIN; - } - - ret = hmm_range_fault(range, block); - if (ret <= 0) { - if (ret == -EBUSY || !ret) { - /* Same as above, drop mmap_sem to match old API. */ - up_read(&range->vma->vm_mm->mmap_sem); - ret = -EBUSY; - } else if (ret == -EAGAIN) - ret = -EBUSY; - hmm_range_unregister(range); - return ret; - } - return 0; -} - /* Below are for HMM internal use only! Not to be used by device driver! */ static inline void hmm_mm_init(struct mm_struct *mm) { From 5fbcf5015db8e9f04a9da6d40322622fa229da54 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 24 Jul 2019 08:52:54 +0200 Subject: [PATCH 048/277] nouveau: remove the block parameter to nouveau_range_fault The parameter is always false, so remove it as well as the -EAGAIN handling that can only happen for the non-blocking case. Link: https://lore.kernel.org/r/20190724065258.16603-4-hch@lst.de Tested-by: Ralph Campbell Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_svm.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 6c1b04de0db8..e3097492b4ad 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -485,8 +485,7 @@ nouveau_range_done(struct hmm_range *range) } static int -nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range, - bool block) +nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range) { long ret; @@ -504,13 +503,12 @@ nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range, return -EAGAIN; } - ret = hmm_range_fault(range, block); + ret = hmm_range_fault(range, true); if (ret <= 0) { if (ret == -EBUSY || !ret) { up_read(&range->vma->vm_mm->mmap_sem); ret = -EBUSY; - } else if (ret == -EAGAIN) - ret = -EBUSY; + } hmm_range_unregister(range); return ret; } @@ -691,7 +689,7 @@ nouveau_svm_fault(struct nvif_notify *notify) range.values = nouveau_svm_pfn_values; range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT; again: - ret = nouveau_range_fault(&svmm->mirror, &range, true); + ret = nouveau_range_fault(&svmm->mirror, &range); if (ret == 0) { mutex_lock(&svmm->mutex); if (!nouveau_range_done(&range)) { From de4ee728465f7c0c29241550e083139b2ce9159c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 24 Jul 2019 08:52:55 +0200 Subject: [PATCH 049/277] nouveau: unlock mmap_sem on all errors from nouveau_range_fault Currently nouveau_svm_fault expects nouveau_range_fault to never unlock mmap_sem, but the latter unlocks it for a random selection of error codes. Fix this up by always unlocking mmap_sem for non-zero return values in nouveau_range_fault, and only unlocking it in the caller for successful returns. Link: https://lore.kernel.org/r/20190724065258.16603-5-hch@lst.de Tested-by: Ralph Campbell Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_svm.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index e3097492b4ad..a835cebb6d90 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -495,8 +495,10 @@ nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range) ret = hmm_range_register(range, mirror, range->start, range->end, PAGE_SHIFT); - if (ret) + if (ret) { + up_read(&range->vma->vm_mm->mmap_sem); return (int)ret; + } if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) { up_read(&range->vma->vm_mm->mmap_sem); @@ -505,10 +507,9 @@ nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range) ret = hmm_range_fault(range, true); if (ret <= 0) { - if (ret == -EBUSY || !ret) { - up_read(&range->vma->vm_mm->mmap_sem); + if (ret == 0) ret = -EBUSY; - } + up_read(&range->vma->vm_mm->mmap_sem); hmm_range_unregister(range); return ret; } @@ -706,8 +707,8 @@ again: NULL); svmm->vmm->vmm.object.client->super = false; mutex_unlock(&svmm->mutex); + up_read(&svmm->mm->mmap_sem); } - up_read(&svmm->mm->mmap_sem); /* Cancel any faults in the window whose pages didn't manage * to keep their valid bit, or stay writeable when required. From 09e088a4903bd0dd911b4f1732b250130cdaffed Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 24 Jul 2019 22:08:50 +0800 Subject: [PATCH 050/277] xen/pciback: remove set but not used variable 'old_state' Fixes gcc '-Wunused-but-set-variable' warning: drivers/xen/xen-pciback/conf_space_capability.c: In function pm_ctrl_write: drivers/xen/xen-pciback/conf_space_capability.c:119:25: warning: variable old_state set but not used [-Wunused-but-set-variable] It is never used so can be removed. Reported-by: Hulk Robot Signed-off-by: YueHaibing Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- drivers/xen/xen-pciback/conf_space_capability.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c index 73427d8e0116..e5694133ebe5 100644 --- a/drivers/xen/xen-pciback/conf_space_capability.c +++ b/drivers/xen/xen-pciback/conf_space_capability.c @@ -116,13 +116,12 @@ static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value, { int err; u16 old_value; - pci_power_t new_state, old_state; + pci_power_t new_state; err = pci_read_config_word(dev, offset, &old_value); if (err) goto out; - old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK); new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK); new_value &= PM_OK_BITS; From 73f628ec9e6bcc45b77c53fe6d0c0ec55eaf82af Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 26 Jul 2019 07:49:29 -0400 Subject: [PATCH 051/277] vhost: disable metadata prefetch optimization This seems to cause guest and host memory corruption. Disable for now until we get a better handle on that. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 819296332913..42a8c2a13ab1 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -96,7 +96,7 @@ struct vhost_uaddr { }; #if defined(CONFIG_MMU_NOTIFIER) && ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 0 -#define VHOST_ARCH_CAN_ACCEL_UACCESS 1 +#define VHOST_ARCH_CAN_ACCEL_UACCESS 0 #else #define VHOST_ARCH_CAN_ACCEL_UACCESS 0 #endif From 52f8c8b32ea2f2044efcb4214c1857e29f421c5d Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 26 Jul 2019 13:28:26 +0200 Subject: [PATCH 052/277] irqchip/gic-v3: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When fall-through warnings was enabled by default the following warning was starting to show up: In file included from ../arch/arm64/include/asm/cputype.h:132, from ../arch/arm64/include/asm/cache.h:8, from ../include/linux/cache.h:6, from ../include/linux/printk.h:9, from ../include/linux/kernel.h:15, from ../include/linux/list.h:9, from ../include/linux/kobject.h:19, from ../include/linux/of.h:17, from ../include/linux/irqdomain.h:35, from ../include/linux/acpi.h:13, from ../drivers/irqchip/irq-gic-v3.c:9: ../drivers/irqchip/irq-gic-v3.c: In function ‘gic_cpu_sys_reg_init’: ../arch/arm64/include/asm/sysreg.h:853:2: warning: this statement may fall through [-Wimplicit-fallthrough=] asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ ^~~ ../arch/arm64/include/asm/arch_gicv3.h:20:29: note: in expansion of macro ‘write_sysreg_s’ #define write_gicreg(v, r) write_sysreg_s(v, SYS_ ## r) ^~~~~~~~~~~~~~ ../drivers/irqchip/irq-gic-v3.c:773:4: note: in expansion of macro ‘write_gicreg’ write_gicreg(0, ICC_AP0R2_EL1); ^~~~~~~~~~~~ ../drivers/irqchip/irq-gic-v3.c:774:3: note: here case 6: ^~~~ Rework so that the compiler doesn't warn about fall-through. Fixes: d93512ef0f0e ("Makefile: Globally enable fall-through warning") Signed-off-by: Anders Roxell Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 1282f81696b2..acd784c37090 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -775,8 +775,10 @@ static void gic_cpu_sys_reg_init(void) case 7: write_gicreg(0, ICC_AP0R3_EL1); write_gicreg(0, ICC_AP0R2_EL1); + /* Fall through */ case 6: write_gicreg(0, ICC_AP0R1_EL1); + /* Fall through */ case 5: case 4: write_gicreg(0, ICC_AP0R0_EL1); @@ -790,8 +792,10 @@ static void gic_cpu_sys_reg_init(void) case 7: write_gicreg(0, ICC_AP1R3_EL1); write_gicreg(0, ICC_AP1R2_EL1); + /* Fall through */ case 6: write_gicreg(0, ICC_AP1R1_EL1); + /* Fall through */ case 5: case 4: write_gicreg(0, ICC_AP1R0_EL1); From 34f8eb92ca053cbba2887bb7e4dbf2b2cd6eb733 Mon Sep 17 00:00:00 2001 From: Nianyao Tang Date: Fri, 26 Jul 2019 17:32:57 +0800 Subject: [PATCH 053/277] irqchip/gic-v3-its: Free unused vpt_page when alloc vpe table fail In its_vpe_init, when its_alloc_vpe_table fails, we should free vpt_page allocated just before, instead of vpe->vpt_page. Let's fix it. Cc: Thomas Gleixner Cc: Jason Cooper Cc: Marc Zyngier Signed-off-by: Nianyao Tang Signed-off-by: Shaokun Zhang Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index cfb9b4e5f914..4439ed881f98 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3008,7 +3008,7 @@ static int its_vpe_init(struct its_vpe *vpe) if (!its_alloc_vpe_table(vpe_id)) { its_vpe_id_free(vpe_id); - its_free_pending_table(vpe->vpt_page); + its_free_pending_table(vpt_page); return -ENOMEM; } From 321275f0d8f5939f2a98749fe03ee97ac97e73d0 Mon Sep 17 00:00:00 2001 From: Nishka Dasgupta Date: Tue, 23 Jul 2019 16:09:10 +0530 Subject: [PATCH 054/277] irqchip/irq-mbigen: Add of_node_put() before return Each iteration of for_each_child_of_node puts the previous node, but in the case of a return from the middle of the loop, there is no put, thus causing a memory leak. Add an of_node_put before the return in three places. Issue found with Coccinelle. Signed-off-by: Nishka Dasgupta Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-mbigen.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index c0f65ea0ae0f..64f3574cc009 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -252,12 +252,15 @@ static int mbigen_of_create_domain(struct platform_device *pdev, parent = platform_bus_type.dev_root; child = of_platform_device_create(np, NULL, parent); - if (!child) + if (!child) { + of_node_put(np); return -ENOMEM; + } if (of_property_read_u32(child->dev.of_node, "num-pins", &num_pins) < 0) { dev_err(&pdev->dev, "No num-pins property\n"); + of_node_put(np); return -EINVAL; } @@ -265,8 +268,10 @@ static int mbigen_of_create_domain(struct platform_device *pdev, mbigen_write_msg, &mbigen_domain_ops, mgn_chip); - if (!domain) + if (!domain) { + of_node_put(np); return -ENOMEM; + } } return 0; From 9a446ef08f3bfc0c3deb9c6be840af2528ef8cf8 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 12 Jul 2019 15:29:05 +0200 Subject: [PATCH 055/277] irqchip/irq-imx-gpcv2: Forward irq type to parent The GPCv2 is a stacked IRQ controller below the ARM GIC. It doesn't care about the IRQ type itself, but needs to forward the type to the parent IRQ controller, so this one can be configured correctly. Signed-off-by: Lucas Stach Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-imx-gpcv2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-imx-gpcv2.c b/drivers/irqchip/irq-imx-gpcv2.c index 66501ea4fd75..f869386eb4cf 100644 --- a/drivers/irqchip/irq-imx-gpcv2.c +++ b/drivers/irqchip/irq-imx-gpcv2.c @@ -134,6 +134,7 @@ static struct irq_chip gpcv2_irqchip_data_chip = { .irq_unmask = imx_gpcv2_irq_unmask, .irq_set_wake = imx_gpcv2_irq_set_wake, .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_set_type = irq_chip_set_type_parent, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, #endif From b5fa9fc9e809f84bb20439730162eccfed906a76 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Mon, 8 Jul 2019 14:19:04 +0800 Subject: [PATCH 056/277] irqchip/renesas-rza1: Fix an use-after-free in rza1_irqc_probe() The gic_node is still being used in the rza1_irqc_parse_map() call after the of_node_put() call, which may result in use-after-free. Fixes: a644ccb819bc ("irqchip: Add Renesas RZ/A1 Interrupt Controller driver") Signed-off-by: Wen Yang Reviewed-by: Geert Uytterhoeven Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-renesas-rza1.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rza1.c b/drivers/irqchip/irq-renesas-rza1.c index b1f19b210190..b0d46ac42b89 100644 --- a/drivers/irqchip/irq-renesas-rza1.c +++ b/drivers/irqchip/irq-renesas-rza1.c @@ -208,20 +208,19 @@ static int rza1_irqc_probe(struct platform_device *pdev) return PTR_ERR(priv->base); gic_node = of_irq_find_parent(np); - if (gic_node) { + if (gic_node) parent = irq_find_host(gic_node); - of_node_put(gic_node); - } if (!parent) { dev_err(dev, "cannot find parent domain\n"); - return -ENODEV; + ret = -ENODEV; + goto out_put_node; } ret = rza1_irqc_parse_map(priv, gic_node); if (ret) { dev_err(dev, "cannot parse %s: %d\n", "interrupt-map", ret); - return ret; + goto out_put_node; } priv->chip.name = "rza1-irqc", @@ -237,10 +236,12 @@ static int rza1_irqc_probe(struct platform_device *pdev) priv); if (!priv->irq_domain) { dev_err(dev, "cannot initialize irq domain\n"); - return -ENOMEM; + ret = -ENOMEM; } - return 0; +out_put_node: + of_node_put(gic_node); + return ret; } static int rza1_irqc_remove(struct platform_device *pdev) From 820571af721990e354649368e641313f85a29976 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 26 Jul 2019 12:31:28 -0300 Subject: [PATCH 057/277] tools include UAPI: Sync x86's syscalls_64.tbl and generic unistd.h to pick up clone3 and pidfd_open MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 05a70a8ec287 ("unistd: protect clone3 via __ARCH_WANT_SYS_CLONE3") 8f3220a80654 ("arch: wire-up clone3() syscall") 7615d9e1780e ("arch: wire-up pidfd_open()") Silencing the following tools/perf build warnings Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h' diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl Now 'perf trace -e pidfd*,clone*' will trace those syscalls as well as the others with those prefixes. $ diff -u /tmp/build/perf/arch/x86/include/generated/asm/syscalls_64.c.before /tmp/build/perf/arch/x86/include/generated/asm/syscalls_64.c --- /tmp/build/perf/arch/x86/include/generated/asm/syscalls_64.c.before 2019-07-26 12:24:55.020944201 -0300 +++ /tmp/build/perf/arch/x86/include/generated/asm/syscalls_64.c 2019-07-26 12:25:03.919047217 -0300 @@ -344,5 +344,7 @@ [431] = "fsconfig", [432] = "fsmount", [433] = "fspick", + [434] = "pidfd_open", + [435] = "clone3", }; -#define SYSCALLTBL_x86_64_MAX_ID 433 +#define SYSCALLTBL_x86_64_MAX_ID 435 $ Cc: Adrian Hunter Cc: Brendan Gregg Cc: Christian Brauner Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-0isnnqxtr1ihz6p8wzjiy47d@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/unistd.h | 8 +++++++- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index a87904daf103..1be0e798e362 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -844,9 +844,15 @@ __SYSCALL(__NR_fsconfig, sys_fsconfig) __SYSCALL(__NR_fsmount, sys_fsmount) #define __NR_fspick 433 __SYSCALL(__NR_fspick, sys_fspick) +#define __NR_pidfd_open 434 +__SYSCALL(__NR_pidfd_open, sys_pidfd_open) +#ifdef __ARCH_WANT_SYS_CLONE3 +#define __NR_clone3 435 +__SYSCALL(__NR_clone3, sys_clone3) +#endif #undef __NR_syscalls -#define __NR_syscalls 434 +#define __NR_syscalls 436 /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index b4e6f9e6204a..c29976eca4a8 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -355,6 +355,8 @@ 431 common fsconfig __x64_sys_fsconfig 432 common fsmount __x64_sys_fsmount 433 common fspick __x64_sys_fspick +434 common pidfd_open __x64_sys_pidfd_open +435 common clone3 __x64_sys_clone3/ptregs # # x32-specific system call numbers start at 512 to avoid cache impact From e0d99c4d24fd8861da724b88ebd18a9fae8a2260 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 26 Jul 2019 12:43:23 -0300 Subject: [PATCH 058/277] tools headers UAPI: Update tools's copy of kvm.h headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Picking the changes from: 66bb8a065f5a ("KVM: x86: PMU Event Filter") f087a02941fe ("KVM: nVMX: Stash L1's CR3 in vmcs01.GUEST_CR3 on nested entry w/o EPT") 99adb567632b ("KVM: arm/arm64: Add save/restore support for firmware workaround state") Silencing this perf build warning: Warning: Kernel ABI header at 'tools/arch/arm/include/uapi/asm/kvm.h' differs from latest version at 'arch/arm/include/uapi/asm/kvm.h' diff -u tools/arch/arm/include/uapi/asm/kvm.h arch/arm/include/uapi/asm/kvm.h Warning: Kernel ABI header at 'tools/arch/arm64/include/uapi/asm/kvm.h' differs from latest version at 'arch/arm64/include/uapi/asm/kvm.h' diff -u tools/arch/arm64/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/vmx.h' differs from latest version at 'arch/x86/include/uapi/asm/vmx.h' diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/kvm.h' differs from latest version at 'arch/x86/include/uapi/asm/kvm.h' diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Now 'perf trace' and other code that might use the tools/perf/trace/beauty autogenerated tables will be able to translate this new ioctl code into a string: $ tools/perf/trace/beauty/kvm_ioctl.sh > before $ $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h $ tools/perf/trace/beauty/kvm_ioctl.sh > after $ diff -u before after --- before 2019-07-26 12:32:47.959220236 -0300 +++ after 2019-07-26 12:33:05.766464871 -0300 @@ -79,6 +79,7 @@ [0xac] = "SET_ONE_REG", [0xad] = "KVMCLOCK_CTRL", [0xb0] = "GET_REG_LIST", + [0xb2] = "SET_PMU_EVENT_FILTER", [0xb7] = "SMI", [0xba] = "MEMORY_ENCRYPT_OP", [0xbb] = "MEMORY_ENCRYPT_REG_REGION", $ Cc: Adrian Hunter Cc: Andre Przywara Cc: Brendan Gregg Cc: Eric Hankland Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Marc Zyngier Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Sean Christopherson Link: https://lkml.kernel.org/n/tip-py1gcmt6rboehlwg6zvagfg2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm/include/uapi/asm/kvm.h | 12 ++++++++++++ tools/arch/arm64/include/uapi/asm/kvm.h | 10 ++++++++++ tools/arch/x86/include/uapi/asm/kvm.h | 22 ++++++++++++++++++---- tools/arch/x86/include/uapi/asm/vmx.h | 1 - tools/include/uapi/linux/kvm.h | 3 +++ 5 files changed, 43 insertions(+), 5 deletions(-) diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index 4602464ebdfb..a4217c1a5d01 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -214,6 +214,18 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM | KVM_REG_SIZE_U64 | \ KVM_REG_ARM_FW | ((r) & 0xffff)) #define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 KVM_REG_ARM_FW_REG(1) + /* Higher values mean better protection. */ +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2) + /* Higher values mean better protection. */ +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL 2 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4) /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index d819a3e8b552..9a507716ae2f 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -229,6 +229,16 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ KVM_REG_ARM_FW | ((r) & 0xffff)) #define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 KVM_REG_ARM_FW_REG(1) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL 2 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4) /* SVE registers */ #define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index d6ab5b4d15e5..503d3f42da16 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -378,10 +378,11 @@ struct kvm_sync_regs { struct kvm_vcpu_events events; }; -#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) -#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) -#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) -#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3) +#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) +#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) +#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) +#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3) +#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */ @@ -432,4 +433,17 @@ struct kvm_nested_state { } data; }; +/* for KVM_CAP_PMU_EVENT_FILTER */ +struct kvm_pmu_event_filter { + __u32 action; + __u32 nevents; + __u32 fixed_counter_bitmap; + __u32 flags; + __u32 pad[4]; + __u64 events[0]; +}; + +#define KVM_PMU_EVENT_ALLOW 0 +#define KVM_PMU_EVENT_DENY 1 + #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index d213ec5c3766..f0b0c90dd398 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -146,7 +146,6 @@ #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 -#define VMX_ABORT_VMCS_CORRUPTED 3 #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 #endif /* _UAPIVMX_H */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index c2152f3dd02d..a7c19540ce21 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -995,6 +995,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_SVE 170 #define KVM_CAP_ARM_PTRAUTH_ADDRESS 171 #define KVM_CAP_ARM_PTRAUTH_GENERIC 172 +#define KVM_CAP_PMU_EVENT_FILTER 173 #ifdef KVM_CAP_IRQ_ROUTING @@ -1329,6 +1330,8 @@ struct kvm_s390_ucas_mapping { #define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info) /* Available with KVM_CAP_PPC_GET_CPU_CHAR */ #define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char) +/* Available with KVM_CAP_PMU_EVENT_FILTER */ +#define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) From 543b8c468f55f27f3c0178a22a91a51aabbbc428 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 17 Jul 2019 18:31:53 -0700 Subject: [PATCH 059/277] f2fs: fix to read source block before invalidating it f2fs_allocate_data_block() invalidates old block address and enable new block address. Then, if we try to read old block by f2fs_submit_page_bio(), it will give WARN due to reading invalid blocks. Let's make the order sanely back. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 70 +++++++++++++++++++++++++--------------------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 6691f526fa40..8974672db78f 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -796,6 +796,29 @@ static int move_data_block(struct inode *inode, block_t bidx, if (lfs_mode) down_write(&fio.sbi->io_order_lock); + mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi), + fio.old_blkaddr, false); + if (!mpage) + goto up_out; + + fio.encrypted_page = mpage; + + /* read source block in mpage */ + if (!PageUptodate(mpage)) { + err = f2fs_submit_page_bio(&fio); + if (err) { + f2fs_put_page(mpage, 1); + goto up_out; + } + lock_page(mpage); + if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) || + !PageUptodate(mpage))) { + err = -EIO; + f2fs_put_page(mpage, 1); + goto up_out; + } + } + f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, &sum, CURSEG_COLD_DATA, NULL, false); @@ -803,44 +826,18 @@ static int move_data_block(struct inode *inode, block_t bidx, newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); if (!fio.encrypted_page) { err = -ENOMEM; + f2fs_put_page(mpage, 1); goto recover_block; } - mpage = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), - fio.old_blkaddr, FGP_LOCK, GFP_NOFS); - if (mpage) { - bool updated = false; - - if (PageUptodate(mpage)) { - memcpy(page_address(fio.encrypted_page), - page_address(mpage), PAGE_SIZE); - updated = true; - } - f2fs_put_page(mpage, 1); - invalidate_mapping_pages(META_MAPPING(fio.sbi), - fio.old_blkaddr, fio.old_blkaddr); - if (updated) - goto write_page; - } - - err = f2fs_submit_page_bio(&fio); - if (err) - goto put_page_out; - - /* write page */ - lock_page(fio.encrypted_page); - - if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) { - err = -EIO; - goto put_page_out; - } - if (unlikely(!PageUptodate(fio.encrypted_page))) { - err = -EIO; - goto put_page_out; - } - -write_page: + /* write target block */ f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true); + memcpy(page_address(fio.encrypted_page), + page_address(mpage), PAGE_SIZE); + f2fs_put_page(mpage, 1); + invalidate_mapping_pages(META_MAPPING(fio.sbi), + fio.old_blkaddr, fio.old_blkaddr); + set_page_dirty(fio.encrypted_page); if (clear_page_dirty_for_io(fio.encrypted_page)) dec_page_count(fio.sbi, F2FS_DIRTY_META); @@ -871,11 +868,12 @@ write_page: put_page_out: f2fs_put_page(fio.encrypted_page, 1); recover_block: - if (lfs_mode) - up_write(&fio.sbi->io_order_lock); if (err) f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr, true, true); +up_out: + if (lfs_mode) + up_write(&fio.sbi->io_order_lock); put_out: f2fs_put_dnode(&dn); out: From 74bf71ed792ab0f64631cc65ccdb54c356c36d45 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Fri, 26 Jul 2019 23:47:02 +0200 Subject: [PATCH 060/277] ALSA: hda: Fix 1-minute detection delay when i915 module is not available Distribution installation images such as Debian include different sets of modules which can be downloaded dynamically. Such images may notably include the hda sound modules but not the i915 DRM module, even if the latter was enabled at build time, as reported on https://bugs.debian.org/931507 In such a case hdac_i915 would be linked in and try to load the i915 module, fail since it is not there, but still wait for a whole minute before giving up binding with it. This fixes such as case by only waiting for the binding if the module was properly loaded (or module support is disabled, in which case i915 is already compiled-in anyway). Fixes: f9b54e1961c7 ("ALSA: hda/i915: Allow delayed i915 audio component binding") Signed-off-by: Samuel Thibault Cc: Signed-off-by: Takashi Iwai --- sound/hda/hdac_i915.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c index 1192c7561d62..3c2db3816029 100644 --- a/sound/hda/hdac_i915.c +++ b/sound/hda/hdac_i915.c @@ -136,10 +136,12 @@ int snd_hdac_i915_init(struct hdac_bus *bus) if (!acomp) return -ENODEV; if (!acomp->ops) { - request_module("i915"); - /* 60s timeout */ - wait_for_completion_timeout(&bind_complete, - msecs_to_jiffies(60 * 1000)); + if (!IS_ENABLED(CONFIG_MODULES) || + !request_module("i915")) { + /* 60s timeout */ + wait_for_completion_timeout(&bind_complete, + msecs_to_jiffies(60 * 1000)); + } } if (!acomp->ops) { dev_info(bus->dev, "couldn't bind with audio component\n"); From ffe0bbabb0cffceceae07484fde1ec2a63b1537c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 8 Jul 2019 10:23:43 +0200 Subject: [PATCH 061/277] gpio: don't WARN() on NULL descs if gpiolib is disabled If gpiolib is disabled, we use the inline stubs from gpio/consumer.h instead of regular definitions of GPIO API. The stubs for 'optional' variants of gpiod_get routines return NULL in this case as if the relevant GPIO wasn't found. This is correct so far. Calling other (non-gpio_get) stubs from this header triggers a warning because the GPIO descriptor couldn't have been requested. The warning however is unconditional (WARN_ON(1)) and is emitted even if the passed descriptor pointer is NULL. We don't want to force the users of 'optional' gpio_get to check the returned pointer before calling e.g. gpiod_set_value() so let's only WARN on non-NULL descriptors. Cc: stable@vger.kernel.org Reported-by: Claus H. Stovgaard Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/consumer.h | 64 +++++++++++++++++------------------ 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 9ddcf50a3c59..a7f08fb0f865 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -247,7 +247,7 @@ static inline void gpiod_put(struct gpio_desc *desc) might_sleep(); /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); } static inline void devm_gpiod_unhinge(struct device *dev, @@ -256,7 +256,7 @@ static inline void devm_gpiod_unhinge(struct device *dev, might_sleep(); /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); } static inline void gpiod_put_array(struct gpio_descs *descs) @@ -264,7 +264,7 @@ static inline void gpiod_put_array(struct gpio_descs *descs) might_sleep(); /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(descs); } static inline struct gpio_desc *__must_check @@ -317,7 +317,7 @@ static inline void devm_gpiod_put(struct device *dev, struct gpio_desc *desc) might_sleep(); /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); } static inline void devm_gpiod_put_array(struct device *dev, @@ -326,32 +326,32 @@ static inline void devm_gpiod_put_array(struct device *dev, might_sleep(); /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(descs); } static inline int gpiod_get_direction(const struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return -ENOSYS; } static inline int gpiod_direction_input(struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return -ENOSYS; } static inline int gpiod_direction_output(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return -ENOSYS; } static inline int gpiod_direction_output_raw(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return -ENOSYS; } @@ -359,7 +359,7 @@ static inline int gpiod_direction_output_raw(struct gpio_desc *desc, int value) static inline int gpiod_get_value(const struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return 0; } static inline int gpiod_get_array_value(unsigned int array_size, @@ -368,13 +368,13 @@ static inline int gpiod_get_array_value(unsigned int array_size, unsigned long *value_bitmap) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc_array); return 0; } static inline void gpiod_set_value(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); } static inline int gpiod_set_array_value(unsigned int array_size, struct gpio_desc **desc_array, @@ -382,13 +382,13 @@ static inline int gpiod_set_array_value(unsigned int array_size, unsigned long *value_bitmap) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc_array); return 0; } static inline int gpiod_get_raw_value(const struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return 0; } static inline int gpiod_get_raw_array_value(unsigned int array_size, @@ -397,13 +397,13 @@ static inline int gpiod_get_raw_array_value(unsigned int array_size, unsigned long *value_bitmap) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc_array); return 0; } static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); } static inline int gpiod_set_raw_array_value(unsigned int array_size, struct gpio_desc **desc_array, @@ -411,14 +411,14 @@ static inline int gpiod_set_raw_array_value(unsigned int array_size, unsigned long *value_bitmap) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc_array); return 0; } static inline int gpiod_get_value_cansleep(const struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return 0; } static inline int gpiod_get_array_value_cansleep(unsigned int array_size, @@ -427,13 +427,13 @@ static inline int gpiod_get_array_value_cansleep(unsigned int array_size, unsigned long *value_bitmap) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc_array); return 0; } static inline void gpiod_set_value_cansleep(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); } static inline int gpiod_set_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, @@ -441,13 +441,13 @@ static inline int gpiod_set_array_value_cansleep(unsigned int array_size, unsigned long *value_bitmap) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc_array); return 0; } static inline int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return 0; } static inline int gpiod_get_raw_array_value_cansleep(unsigned int array_size, @@ -456,14 +456,14 @@ static inline int gpiod_get_raw_array_value_cansleep(unsigned int array_size, unsigned long *value_bitmap) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc_array); return 0; } static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); } static inline int gpiod_set_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, @@ -471,41 +471,41 @@ static inline int gpiod_set_raw_array_value_cansleep(unsigned int array_size, unsigned long *value_bitmap) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc_array); return 0; } static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return -ENOSYS; } static inline int gpiod_set_transitory(struct gpio_desc *desc, bool transitory) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return -ENOSYS; } static inline int gpiod_is_active_low(const struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return 0; } static inline int gpiod_cansleep(const struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return 0; } static inline int gpiod_to_irq(const struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return -EINVAL; } @@ -513,7 +513,7 @@ static inline int gpiod_set_consumer_name(struct gpio_desc *desc, const char *name) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return -EINVAL; } @@ -525,7 +525,7 @@ static inline struct gpio_desc *gpio_to_desc(unsigned gpio) static inline int desc_to_gpio(const struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return -EINVAL; } From 25e5ef302c24a6fead369c0cfe88c073d7b97ca8 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 28 Jul 2019 18:41:38 +0200 Subject: [PATCH 062/277] eeprom: at24: make spd world-readable again The integration of the at24 driver into the nvmem framework broke the world-readability of spd EEPROMs. Fix it. Signed-off-by: Jean Delvare Cc: stable@vger.kernel.org Fixes: 57d155506dd5 ("eeprom: at24: extend driver to plug into the NVMEM framework") Cc: Andrew Lunn Cc: Srinivas Kandagatla Cc: Greg Kroah-Hartman Cc: Bartosz Golaszewski Cc: Arnd Bergmann Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 35bf2477693d..518945b2f737 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -685,7 +685,7 @@ static int at24_probe(struct i2c_client *client) nvmem_config.name = dev_name(dev); nvmem_config.dev = dev; nvmem_config.read_only = !writable; - nvmem_config.root_only = true; + nvmem_config.root_only = !(flags & AT24_FLAG_IRUGO); nvmem_config.owner = THIS_MODULE; nvmem_config.compat = true; nvmem_config.base_dev = dev; From d95da993383c78f7efd25957ba3af23af4b1c613 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Mon, 8 Jul 2019 08:35:58 +1200 Subject: [PATCH 063/277] gpiolib: Preserve desc->flags when setting state desc->flags may already have values set by of_gpiochip_add() so make sure that this isn't undone when setting the initial direction. Cc: stable@vger.kernel.org Fixes: 3edfb7bd76bd1cba ("gpiolib: Show correct direction from the beginning") Signed-off-by: Chris Packham Link: https://lore.kernel.org/r/20190707203558.10993-1-chris.packham@alliedtelesis.co.nz Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index bf05c29b53be..f497003f119c 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1394,12 +1394,17 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, for (i = 0; i < chip->ngpio; i++) { struct gpio_desc *desc = &gdev->descs[i]; - if (chip->get_direction && gpiochip_line_is_valid(chip, i)) - desc->flags = !chip->get_direction(chip, i) ? - (1 << FLAG_IS_OUT) : 0; - else - desc->flags = !chip->direction_input ? - (1 << FLAG_IS_OUT) : 0; + if (chip->get_direction && gpiochip_line_is_valid(chip, i)) { + if (!chip->get_direction(chip, i)) + set_bit(FLAG_IS_OUT, &desc->flags); + else + clear_bit(FLAG_IS_OUT, &desc->flags); + } else { + if (!chip->direction_input) + set_bit(FLAG_IS_OUT, &desc->flags); + else + clear_bit(FLAG_IS_OUT, &desc->flags); + } } acpi_gpiochip_add(chip); From cee3536d24a1d5db66b9f68c3ece0af128187ab4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 22 Jul 2019 22:26:56 +1000 Subject: [PATCH 064/277] powerpc: Wire up clone3 syscall Wire up the new clone3 syscall added in commit 7f192e3cd316 ("fork: add clone3"). This requires a ppc_clone3 wrapper, in order to save the non-volatile GPRs before calling into the generic syscall code. Otherwise we hit the BUG_ON in CHECK_FULL_REGS in copy_thread(). Lightly tested using Christian's test code on a Power8 LE VM. Signed-off-by: Michael Ellerman Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20190724140259.23554-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/unistd.h | 1 + arch/powerpc/kernel/entry_32.S | 8 ++++++++ arch/powerpc/kernel/entry_64.S | 5 +++++ arch/powerpc/kernel/syscalls/syscall.tbl | 2 +- 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 68473c3c471c..b0720c7c3fcf 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -49,6 +49,7 @@ #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_UNISTD_H_ */ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 85fdb6d879f1..54fab22c9a43 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -597,6 +597,14 @@ ppc_clone: stw r0,_TRAP(r1) /* register set saved */ b sys_clone + .globl ppc_clone3 +ppc_clone3: + SAVE_NVGPRS(r1) + lwz r0,_TRAP(r1) + rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ + stw r0,_TRAP(r1) /* register set saved */ + b sys_clone3 + .globl ppc_swapcontext ppc_swapcontext: SAVE_NVGPRS(r1) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index d9105fcf4021..0a0b5310f54a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -487,6 +487,11 @@ _GLOBAL(ppc_clone) bl sys_clone b .Lsyscall_exit +_GLOBAL(ppc_clone3) + bl save_nvgprs + bl sys_clone3 + b .Lsyscall_exit + _GLOBAL(ppc32_swapcontext) bl save_nvgprs bl compat_sys_swapcontext diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 3331749aab20..43f736ed47f2 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -516,4 +516,4 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open -# 435 reserved for clone3 +435 nospu clone3 ppc_clone3 From 622445541b751ce70a952b8486ce4512a792c0a2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 25 Jul 2019 17:05:13 +0900 Subject: [PATCH 065/277] kbuild: detect missing "WITH Linux-syscall-note" for uapi headers UAPI headers licensed under GPL are supposed to have exception "WITH Linux-syscall-note" so that they can be included into non-GPL user space application code. Unfortunately, people often miss to add it. Break 'make headers' when any of exported headers lacks the exception note so that the 0-day bot can easily catch it. Signed-off-by: Masahiro Yamada --- scripts/headers_install.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/headers_install.sh b/scripts/headers_install.sh index 47f6f3ea0771..bbaf29386995 100755 --- a/scripts/headers_install.sh +++ b/scripts/headers_install.sh @@ -23,6 +23,12 @@ TMPFILE=$OUTFILE.tmp trap 'rm -f $OUTFILE $TMPFILE' EXIT +# SPDX-License-Identifier with GPL variants must have "WITH Linux-syscall-note" +if [ -n "$(sed -n -e "/SPDX-License-Identifier:.*GPL-/{/WITH Linux-syscall-note/!p}" $INFILE)" ]; then + echo "error: $INFILE: missing \"WITH Linux-syscall-note\" for SPDX-License-Identifier" >&2 + exit 1 +fi + sed -E -e ' s/([[:space:](])(__user|__force|__iomem)[[:space:]]/\1/g s/__attribute_const__([[:space:]]|$)/\1/g From 2e616d9f9ce8d469db4cd0a019cdc2ff3feab577 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 28 Jul 2019 21:12:32 -0700 Subject: [PATCH 066/277] xfs: fix stack contents leakage in the v1 inumber ioctls Explicitly initialize the onstack structures to zero so we don't leak kernel memory into userspace when converting the in-core inumbers structure to the v1 inogrp ioctl structure. Add a comment about why we have to use memset to ensure that the padding holes in the structures are set to zero. Fixes: 5f19c7fc6873351 ("xfs: introduce v5 inode group structure") Reported-by: Dan Carpenter Signed-off-by: Darrick J. Wong Reviewed-by: Eric Sandeen --- fs/xfs/xfs_itable.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index a8a06bb78ea8..f5c955d35be4 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -272,6 +272,7 @@ xfs_bulkstat_to_bstat( struct xfs_bstat *bs1, const struct xfs_bulkstat *bstat) { + /* memset is needed here because of padding holes in the structure. */ memset(bs1, 0, sizeof(struct xfs_bstat)); bs1->bs_ino = bstat->bs_ino; bs1->bs_mode = bstat->bs_mode; @@ -388,6 +389,8 @@ xfs_inumbers_to_inogrp( struct xfs_inogrp *ig1, const struct xfs_inumbers *ig) { + /* memset is needed here because of padding holes in the structure. */ + memset(ig1, 0, sizeof(struct xfs_inogrp)); ig1->xi_startino = ig->xi_startino; ig1->xi_alloccount = ig->xi_alloccount; ig1->xi_allocmask = ig->xi_allocmask; From 38fb6d0ea34299d97b031ed64fe994158b6f8eb3 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 25 Jul 2019 11:08:52 +0800 Subject: [PATCH 067/277] f2fs: use EINVAL for superblock with invalid magic The kernel mount_block_root() function expects -EACESS or -EINVAL for a unmountable filesystem when trying to mount the root with different filesystem types. However, in 5.3-rc1 the behavior when F2FS code cannot find valid block changed to return -EFSCORRUPTED(-EUCLEAN), and this error code makes mount_block_root() fail when trying to probe F2FS. When the magic number of the superblock mismatches, it has a high probability that it's just not a F2FS. In this case return -EINVAL seems to be a better result, and this return value can make mount_block_root() probing work again. Return -EINVAL when the superblock has magic mismatch, -EFSCORRUPTED in other cases (the magic matches but the superblock cannot be recognized). Fixes: 10f966bbf521 ("f2fs: use generic EFSBADCRC/EFSCORRUPTED") Signed-off-by: Icenowy Zheng Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d95a681ef7c9..1838dd852a50 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2422,6 +2422,12 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, size_t crc_offset = 0; __u32 crc = 0; + if (le32_to_cpu(raw_super->magic) != F2FS_SUPER_MAGIC) { + f2fs_info(sbi, "Magic Mismatch, valid(0x%x) - read(0x%x)", + F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic)); + return -EINVAL; + } + /* Check checksum_offset and crc in superblock */ if (__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_SB_CHKSUM)) { crc_offset = le32_to_cpu(raw_super->checksum_offset); @@ -2429,26 +2435,20 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, offsetof(struct f2fs_super_block, crc)) { f2fs_info(sbi, "Invalid SB checksum offset: %zu", crc_offset); - return 1; + return -EFSCORRUPTED; } crc = le32_to_cpu(raw_super->crc); if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) { f2fs_info(sbi, "Invalid SB checksum value: %u", crc); - return 1; + return -EFSCORRUPTED; } } - if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) { - f2fs_info(sbi, "Magic Mismatch, valid(0x%x) - read(0x%x)", - F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic)); - return 1; - } - /* Currently, support only 4KB page cache size */ if (F2FS_BLKSIZE != PAGE_SIZE) { f2fs_info(sbi, "Invalid page_cache_size (%lu), supports only 4KB", PAGE_SIZE); - return 1; + return -EFSCORRUPTED; } /* Currently, support only 4KB block size */ @@ -2456,14 +2456,14 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, if (blocksize != F2FS_BLKSIZE) { f2fs_info(sbi, "Invalid blocksize (%u), supports only 4KB", blocksize); - return 1; + return -EFSCORRUPTED; } /* check log blocks per segment */ if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) { f2fs_info(sbi, "Invalid log blocks per segment (%u)", le32_to_cpu(raw_super->log_blocks_per_seg)); - return 1; + return -EFSCORRUPTED; } /* Currently, support 512/1024/2048/4096 bytes sector size */ @@ -2473,7 +2473,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, F2FS_MIN_LOG_SECTOR_SIZE) { f2fs_info(sbi, "Invalid log sectorsize (%u)", le32_to_cpu(raw_super->log_sectorsize)); - return 1; + return -EFSCORRUPTED; } if (le32_to_cpu(raw_super->log_sectors_per_block) + le32_to_cpu(raw_super->log_sectorsize) != @@ -2481,7 +2481,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, f2fs_info(sbi, "Invalid log sectors per block(%u) log sectorsize(%u)", le32_to_cpu(raw_super->log_sectors_per_block), le32_to_cpu(raw_super->log_sectorsize)); - return 1; + return -EFSCORRUPTED; } segment_count = le32_to_cpu(raw_super->segment_count); @@ -2495,7 +2495,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, if (segment_count > F2FS_MAX_SEGMENT || segment_count < F2FS_MIN_SEGMENTS) { f2fs_info(sbi, "Invalid segment count (%u)", segment_count); - return 1; + return -EFSCORRUPTED; } if (total_sections > segment_count || @@ -2503,25 +2503,25 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, segs_per_sec > segment_count || !segs_per_sec) { f2fs_info(sbi, "Invalid segment/section count (%u, %u x %u)", segment_count, total_sections, segs_per_sec); - return 1; + return -EFSCORRUPTED; } if ((segment_count / segs_per_sec) < total_sections) { f2fs_info(sbi, "Small segment_count (%u < %u * %u)", segment_count, segs_per_sec, total_sections); - return 1; + return -EFSCORRUPTED; } if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) { f2fs_info(sbi, "Wrong segment_count / block_count (%u > %llu)", segment_count, le64_to_cpu(raw_super->block_count)); - return 1; + return -EFSCORRUPTED; } if (secs_per_zone > total_sections || !secs_per_zone) { f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)", secs_per_zone, total_sections); - return 1; + return -EFSCORRUPTED; } if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION || raw_super->hot_ext_count > F2FS_MAX_EXTENSION || @@ -2531,7 +2531,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, le32_to_cpu(raw_super->extension_count), raw_super->hot_ext_count, F2FS_MAX_EXTENSION); - return 1; + return -EFSCORRUPTED; } if (le32_to_cpu(raw_super->cp_payload) > @@ -2539,7 +2539,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, f2fs_info(sbi, "Insane cp_payload (%u > %u)", le32_to_cpu(raw_super->cp_payload), blocks_per_seg - F2FS_CP_PACKS); - return 1; + return -EFSCORRUPTED; } /* check reserved ino info */ @@ -2550,12 +2550,12 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, le32_to_cpu(raw_super->node_ino), le32_to_cpu(raw_super->meta_ino), le32_to_cpu(raw_super->root_ino)); - return 1; + return -EFSCORRUPTED; } /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ if (sanity_check_area_boundary(sbi, bh)) - return 1; + return -EFSCORRUPTED; return 0; } @@ -2872,10 +2872,10 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, } /* sanity checking of raw super */ - if (sanity_check_raw_super(sbi, bh)) { + err = sanity_check_raw_super(sbi, bh); + if (err) { f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock", block + 1); - err = -EFSCORRUPTED; brelse(bh); continue; } From c6622a425acd1d2f3a443cd39b490a8777b622d7 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 26 Jul 2019 12:34:32 -0700 Subject: [PATCH 068/277] dma-contiguous: do not overwrite align in dma_alloc_contiguous() The dma_alloc_contiguous() limits align at CONFIG_CMA_ALIGNMENT for cma_alloc() however it does not restore it for the fallback routine. This will result in a size mismatch between the allocation and free when running into the fallback routines after cma_alloc() fails, if the align is larger than CONFIG_CMA_ALIGNMENT. This patch adds a cma_align to take care of cma_alloc() and prevent the align from being overwritten. Fixes: fdaeec198ada ("dma-contiguous: add dma_{alloc,free}_contiguous() helpers") Reported-by: Dafna Hirschfeld Signed-off-by: Nicolin Chen Signed-off-by: Christoph Hellwig --- kernel/dma/contiguous.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index bfc0c17f2a3d..ea8259f53eda 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -243,8 +243,9 @@ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) /* CMA can be used only in the context which permits sleeping */ if (cma && gfpflags_allow_blocking(gfp)) { - align = min_t(size_t, align, CONFIG_CMA_ALIGNMENT); - page = cma_alloc(cma, count, align, gfp & __GFP_NOWARN); + size_t cma_align = min_t(size_t, align, CONFIG_CMA_ALIGNMENT); + + page = cma_alloc(cma, count, cma_align, gfp & __GFP_NOWARN); } /* Fallback allocation of normal pages */ From f46cc0152501e46d1b3aa5e7eade61145070eab0 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 26 Jul 2019 12:34:33 -0700 Subject: [PATCH 069/277] dma-contiguous: page-align the size in dma_free_contiguous() According to the original dma_direct_alloc_pages() code: { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; if (!dma_release_from_contiguous(dev, page, count)) __free_pages(page, get_order(size)); } The count parameter for dma_release_from_contiguous() was page aligned before the right-shifting operation, while the new API dma_free_contiguous() forgets to have PAGE_ALIGN() at the size. So this patch simply adds it to prevent any corner case. Fixes: fdaeec198ada ("dma-contiguous: add dma_{alloc,free}_contiguous() helpers") Signed-off-by: Nicolin Chen Reviewed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig --- kernel/dma/contiguous.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index ea8259f53eda..2bd410f934b3 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -267,7 +267,8 @@ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) */ void dma_free_contiguous(struct device *dev, struct page *page, size_t size) { - if (!cma_release(dev_get_cma_area(dev), page, size >> PAGE_SHIFT)) + if (!cma_release(dev_get_cma_area(dev), page, + PAGE_ALIGN(size) >> PAGE_SHIFT)) __free_pages(page, get_order(size)); } From 849adec41203ac5837c40c2d7e08490ffdef3c2c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Jul 2019 11:06:17 +0100 Subject: [PATCH 070/277] arm64: compat: Allow single-byte watchpoints on all addresses Commit d968d2b801d8 ("ARM: 7497/1: hw_breakpoint: allow single-byte watchpoints on all addresses") changed the validation requirements for hardware watchpoints on arch/arm/. Update our compat layer to implement the same relaxation. Cc: Signed-off-by: Will Deacon --- arch/arm64/kernel/hw_breakpoint.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index dceb84520948..67b3bae50b92 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -536,13 +536,14 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, /* Aligned */ break; case 1: - /* Allow single byte watchpoint. */ - if (hw->ctrl.len == ARM_BREAKPOINT_LEN_1) - break; case 2: /* Allow halfword watchpoints and breakpoints. */ if (hw->ctrl.len == ARM_BREAKPOINT_LEN_2) break; + case 3: + /* Allow single byte watchpoint. */ + if (hw->ctrl.len == ARM_BREAKPOINT_LEN_1) + break; default: return -EINVAL; } From 0d7fd70f26039bd4b33444ca47f0e69ce3ae0354 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Jul 2019 11:43:48 +0100 Subject: [PATCH 071/277] drivers/perf: arm_pmu: Fix failure path in PM notifier Handling of the CPU_PM_ENTER_FAILED transition in the Arm PMU PM notifier code incorrectly skips restoration of the counters. Fix the logic so that CPU_PM_ENTER_FAILED follows the same path as CPU_PM_EXIT. Cc: Fixes: da4e4f18afe0f372 ("drivers/perf: arm_pmu: implement CPU_PM notifier") Reported-by: Anders Roxell Acked-by: Lorenzo Pieralisi Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 2d06b8095a19..df352b334ea7 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -723,8 +723,8 @@ static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd, cpu_pm_pmu_setup(armpmu, cmd); break; case CPU_PM_EXIT: - cpu_pm_pmu_setup(armpmu, cmd); case CPU_PM_ENTER_FAILED: + cpu_pm_pmu_setup(armpmu, cmd); armpmu->start(armpmu); break; default: From 75a382f1c9e5092db10ad1e2111633d61b65e578 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Jul 2019 11:39:45 +0100 Subject: [PATCH 072/277] arm64: hw_breakpoint: Fix warnings about implicit fallthrough Now that -Wimplicit-fallthrough is passed to GCC by default, the kernel build has suddenly got noisy. Annotate the two fall-through cases in our hw_breakpoint implementation, since they are both intentional. Reported-by: Anders Roxell Signed-off-by: Will Deacon --- arch/arm64/kernel/hw_breakpoint.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 67b3bae50b92..38ee1514cd9c 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -540,10 +540,14 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, /* Allow halfword watchpoints and breakpoints. */ if (hw->ctrl.len == ARM_BREAKPOINT_LEN_2) break; + + /* Fallthrough */ case 3: /* Allow single byte watchpoint. */ if (hw->ctrl.len == ARM_BREAKPOINT_LEN_1) break; + + /* Fallthrough */ default: return -EINVAL; } From 6655473920129eb2dd1dded147722316294a699a Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 26 Jul 2019 13:27:25 +0200 Subject: [PATCH 073/277] arm64: smp: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When fall-through warnings was enabled by default the following warning was starting to show up: In file included from ../include/linux/kernel.h:15, from ../include/linux/list.h:9, from ../include/linux/kobject.h:19, from ../include/linux/of.h:17, from ../include/linux/irqdomain.h:35, from ../include/linux/acpi.h:13, from ../arch/arm64/kernel/smp.c:9: ../arch/arm64/kernel/smp.c: In function ‘__cpu_up’: ../include/linux/printk.h:302:2: warning: this statement may fall through [-Wimplicit-fallthrough=] printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../arch/arm64/kernel/smp.c:156:4: note: in expansion of macro ‘pr_crit’ pr_crit("CPU%u: may not have shut down cleanly\n", cpu); ^~~~~~~ ../arch/arm64/kernel/smp.c:157:3: note: here case CPU_STUCK_IN_KERNEL: ^~~~ Rework so that the compiler doesn't warn about fall-through. Fixes: d93512ef0f0e ("Makefile: Globally enable fall-through warning") Signed-off-by: Anders Roxell Signed-off-by: Will Deacon --- arch/arm64/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index ea90d3bd9253..018a33e01b0e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -152,8 +152,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_crit("CPU%u: died during early boot\n", cpu); break; } - /* Fall through */ pr_crit("CPU%u: may not have shut down cleanly\n", cpu); + /* Fall through */ case CPU_STUCK_IN_KERNEL: pr_crit("CPU%u: is stuck in kernel\n", cpu); if (status & CPU_STUCK_REASON_52_BIT_VA) From eca92a53a6ab9f27f1b61dcb3e16ebef75f0d5bb Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 26 Jul 2019 13:27:21 +0200 Subject: [PATCH 074/277] arm64: module: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When fall-through warnings was enabled by default the following warnings was starting to show up: ../arch/arm64/kernel/module.c: In function ‘apply_relocate_add’: ../arch/arm64/kernel/module.c:316:19: warning: this statement may fall through [-Wimplicit-fallthrough=] overflow_check = false; ~~~~~~~~~~~~~~~^~~~~~~ ../arch/arm64/kernel/module.c:317:3: note: here case R_AARCH64_MOVW_UABS_G0: ^~~~ ../arch/arm64/kernel/module.c:322:19: warning: this statement may fall through [-Wimplicit-fallthrough=] overflow_check = false; ~~~~~~~~~~~~~~~^~~~~~~ ../arch/arm64/kernel/module.c:323:3: note: here case R_AARCH64_MOVW_UABS_G1: ^~~~ Rework so that the compiler doesn't warn about fall-through. Fixes: d93512ef0f0e ("Makefile: Globally enable fall-through warning") Signed-off-by: Anders Roxell Signed-off-by: Will Deacon --- arch/arm64/kernel/module.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 46e643e30708..03ff15bffbb6 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -314,18 +314,21 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* MOVW instruction relocations. */ case R_AARCH64_MOVW_UABS_G0_NC: overflow_check = false; + /* Fall through */ case R_AARCH64_MOVW_UABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_UABS_G1_NC: overflow_check = false; + /* Fall through */ case R_AARCH64_MOVW_UABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_UABS_G2_NC: overflow_check = false; + /* Fall through */ case R_AARCH64_MOVW_UABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, AARCH64_INSN_IMM_MOVKZ); @@ -393,6 +396,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, break; case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; + /* Fall through */ case R_AARCH64_ADR_PREL_PG_HI21: ovf = reloc_insn_adrp(me, sechdrs, loc, val); if (ovf && ovf != -ERANGE) From ac65bdfef14a902b40ff69a35f5c604dba096547 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 19 Jun 2019 18:01:35 +0100 Subject: [PATCH 075/277] drm/i915: Keep rings pinned while the context is active Remember to keep the rings pinned as well as the context image until the GPU is no longer active. v2: Introduce a ring->pin_count primarily to hide the mock_ring that doesn't fit into the normal GGTT vma picture. v3: Order is important in teardown, ringbuffer submission needs to drop the pin count on the engine->kernel_context before it can gleefully free its ring. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110946 Fixes: ce476c80b8bf ("drm/i915: Keep contexts pinned until after the next kernel context switch") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190619170135.15281-1-chris@chris-wilson.co.uk (cherry picked from commit 09c5ab384f6fb30f834a5777888b4486dd7f015d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_context.c | 27 +++++++++++------ drivers/gpu/drm/i915/gt/intel_engine_types.h | 12 ++++++++ drivers/gpu/drm/i915/gt/intel_lrc.c | 10 ++----- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 31 +++++++++++++------- drivers/gpu/drm/i915/gt/mock_engine.c | 1 + 5 files changed, 53 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 2c454f227c2e..23120901c55f 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -126,6 +126,7 @@ static void intel_context_retire(struct i915_active *active) if (ce->state) __context_unpin_state(ce->state); + intel_ring_unpin(ce->ring); intel_context_put(ce); } @@ -160,27 +161,35 @@ int intel_context_active_acquire(struct intel_context *ce, unsigned long flags) intel_context_get(ce); + err = intel_ring_pin(ce->ring); + if (err) + goto err_put; + if (!ce->state) return 0; err = __context_pin_state(ce->state, flags); - if (err) { - i915_active_cancel(&ce->active); - intel_context_put(ce); - return err; - } + if (err) + goto err_ring; /* Preallocate tracking nodes */ if (!i915_gem_context_is_kernel(ce->gem_context)) { err = i915_active_acquire_preallocate_barrier(&ce->active, ce->engine); - if (err) { - i915_active_release(&ce->active); - return err; - } + if (err) + goto err_state; } return 0; + +err_state: + __context_unpin_state(ce->state); +err_ring: + intel_ring_unpin(ce->ring); +err_put: + intel_context_put(ce); + i915_active_cancel(&ce->active); + return err; } void intel_context_active_release(struct intel_context *ce) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 868b220214f8..43e975a26016 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -70,6 +70,18 @@ struct intel_ring { struct list_head request_list; struct list_head active_link; + /* + * As we have two types of rings, one global to the engine used + * by ringbuffer submission and those that are exclusive to a + * context used by execlists, we have to play safe and allow + * atomic updates to the pin_count. However, the actual pinning + * of the context is either done during initialisation for + * ringbuffer submission or serialised as part of the context + * pinning for execlists, and so we do not need a mutex ourselves + * to serialise intel_ring_pin/intel_ring_unpin. + */ + atomic_t pin_count; + u32 head; u32 tail; u32 emit; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index b42b5f158295..82b7ace62d97 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1414,6 +1414,7 @@ static void execlists_context_destroy(struct kref *kref) { struct intel_context *ce = container_of(kref, typeof(*ce), ref); + GEM_BUG_ON(!i915_active_is_idle(&ce->active)); GEM_BUG_ON(intel_context_is_pinned(ce)); if (ce->state) @@ -1426,7 +1427,6 @@ static void execlists_context_unpin(struct intel_context *ce) { i915_gem_context_unpin_hw_id(ce->gem_context); i915_gem_object_unpin_map(ce->state->obj); - intel_ring_unpin(ce->ring); } static void @@ -1478,13 +1478,9 @@ __execlists_context_pin(struct intel_context *ce, goto unpin_active; } - ret = intel_ring_pin(ce->ring); - if (ret) - goto unpin_map; - ret = i915_gem_context_pin_hw_id(ce->gem_context); if (ret) - goto unpin_ring; + goto unpin_map; ce->lrc_desc = lrc_descriptor(ce, engine); ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; @@ -1492,8 +1488,6 @@ __execlists_context_pin(struct intel_context *ce, return 0; -unpin_ring: - intel_ring_unpin(ce->ring); unpin_map: i915_gem_object_unpin_map(ce->state->obj); unpin_active: diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index c6023bc9452d..12010e798868 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1149,16 +1149,16 @@ i915_emit_bb_start(struct i915_request *rq, int intel_ring_pin(struct intel_ring *ring) { struct i915_vma *vma = ring->vma; - enum i915_map_type map = i915_coherent_map_type(vma->vm->i915); unsigned int flags; void *addr; int ret; - GEM_BUG_ON(ring->vaddr); + if (atomic_fetch_inc(&ring->pin_count)) + return 0; ret = i915_timeline_pin(ring->timeline); if (ret) - return ret; + goto err_unpin; flags = PIN_GLOBAL; @@ -1172,26 +1172,31 @@ int intel_ring_pin(struct intel_ring *ring) ret = i915_vma_pin(vma, 0, 0, flags); if (unlikely(ret)) - goto unpin_timeline; + goto err_timeline; if (i915_vma_is_map_and_fenceable(vma)) addr = (void __force *)i915_vma_pin_iomap(vma); else - addr = i915_gem_object_pin_map(vma->obj, map); + addr = i915_gem_object_pin_map(vma->obj, + i915_coherent_map_type(vma->vm->i915)); if (IS_ERR(addr)) { ret = PTR_ERR(addr); - goto unpin_ring; + goto err_ring; } vma->obj->pin_global++; + GEM_BUG_ON(ring->vaddr); ring->vaddr = addr; + return 0; -unpin_ring: +err_ring: i915_vma_unpin(vma); -unpin_timeline: +err_timeline: i915_timeline_unpin(ring->timeline); +err_unpin: + atomic_dec(&ring->pin_count); return ret; } @@ -1207,16 +1212,19 @@ void intel_ring_reset(struct intel_ring *ring, u32 tail) void intel_ring_unpin(struct intel_ring *ring) { - GEM_BUG_ON(!ring->vma); - GEM_BUG_ON(!ring->vaddr); + if (!atomic_dec_and_test(&ring->pin_count)) + return; /* Discard any unused bytes beyond that submitted to hw. */ intel_ring_reset(ring, ring->tail); + GEM_BUG_ON(!ring->vma); if (i915_vma_is_map_and_fenceable(ring->vma)) i915_vma_unpin_iomap(ring->vma); else i915_gem_object_unpin_map(ring->vma->obj); + + GEM_BUG_ON(!ring->vaddr); ring->vaddr = NULL; ring->vma->obj->pin_global--; @@ -2081,10 +2089,11 @@ static void ring_destroy(struct intel_engine_cs *engine) WARN_ON(INTEL_GEN(dev_priv) > 2 && (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); + intel_engine_cleanup_common(engine); + intel_ring_unpin(engine->buffer); intel_ring_put(engine->buffer); - intel_engine_cleanup_common(engine); kfree(engine); } diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 086801b51441..486c6953dcb1 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -66,6 +66,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) ring->base.effective_size = sz; ring->base.vaddr = (void *)(ring + 1); ring->base.timeline = &ring->timeline; + atomic_set(&ring->base.pin_count, 1); INIT_LIST_HEAD(&ring->base.request_list); intel_ring_update_space(&ring->base); From 248f883db61283b4f5a1c92a5e27277377b09f16 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 25 Jun 2019 10:06:55 +0100 Subject: [PATCH 076/277] drm/i915: Disable SAMPLER_STATE prefetching on all Gen11 steppings. The Demand Prefetch workaround (binding table prefetching) only applies to Icelake A0/B0. But the Sampler Prefetch workaround needs to be applied to all Gen11 steppings, according to a programming note in the SARCHKMD documentation. Using the Intel Gallium driver, I have seen intermittent failures in the dEQP-GLES31.functional.copy_image.non_compressed.* tests. After applying this workaround, the tests reliably pass. v2: Remove the overlap with a pre-production w/a BSpec: 9663 Signed-off-by: Kenneth Graunke Signed-off-by: Chris Wilson Cc: stable@vger.kernel.org Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190625090655.19220-1-chris@chris-wilson.co.uk (cherry picked from commit f9a393875d3af13cc3267477746608dadb7f17c1) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 15e90fd2cfdc..50c0060509a6 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1258,8 +1258,12 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) wa_write_or(wal, GEN7_SARCHKMD, - GEN7_DISABLE_DEMAND_PREFETCH | - GEN7_DISABLE_SAMPLER_PREFETCH); + GEN7_DISABLE_DEMAND_PREFETCH); + + /* Wa_1606682166:icl */ + wa_write_or(wal, + GEN7_SARCHKMD, + GEN7_DISABLE_SAMPLER_PREFETCH); } if (IS_GEN_RANGE(i915, 9, 11)) { From 95eef14cdad150fed43147bcd4f29eea3d0a3f03 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 10 Jun 2019 11:19:14 +0300 Subject: [PATCH 077/277] drm/i915/perf: fix ICL perf register offsets We got the wrong offsets (could they have changed?). New values were computed off an error state by looking up the register offset in the context image as written by the HW. Signed-off-by: Lionel Landwerlin Fixes: 1de401c08fa805 ("drm/i915/perf: enable perf support on ICL") Cc: # v4.18+ Acked-by: Kenneth Graunke Link: https://patchwork.freedesktop.org/patch/msgid/20190610081914.25428-1-lionel.g.landwerlin@intel.com (cherry picked from commit 8dcfdfb4501012a8d36d2157dc73925715f2befb) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_perf.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a700c5c3d167..1ae06a1b6749 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3477,9 +3477,13 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; dev_priv->perf.oa.ops.disable_metric_set = gen10_disable_metric_set; - dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; - dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; - + if (IS_GEN(dev_priv, 10)) { + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; + dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; + } else { + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x124; + dev_priv->perf.oa.ctx_flexeu0_offset = 0x78e; + } dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); } } From 7d3cd66261665da491d0ee582beabe23df60f983 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 19 Jun 2019 20:08:39 +0300 Subject: [PATCH 078/277] drm/i915: Fix various tracepoints for gen2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gen2 doesn't have a frame counter and apparently we no longer provide a fake .get_vblank_counter() hook for it. That means all tracepoints calling that hook will oops. Update the tracepoints to use intel_crtc_get_vblank_counter() which will gracefully fall back to using the software counter. This is actually a better approach since we now get (hopefully accurate) frame numbers in the traces. This also gets rid of the raw driver->get_vblank_counter() calls, which we need to do in order to switch to the per-crtc vblank vfuncs. v2: Deal with new tracepoints v3: Use a distinct variable name for the internal crtc iterator (Chris) Cc: Shawn Guo Cc: Daniel Vetter Fixes: 967dd4841787 ("drm: remove drm_vblank_no_hw_counter assignment from driver code") Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190619170842.20579-2-ville.syrjala@linux.intel.com (cherry picked from commit 4c888e7bd26f58deb27c2e6ddc90000b89ee9393) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 4 +- drivers/gpu/drm/i915/i915_trace.h | 76 +++++++++----------- 2 files changed, 35 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 30b97ded6fdd..592b92782fab 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1839,7 +1839,7 @@ static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) /* FIXME: assert CPU port conditions for SNB+ */ } - trace_intel_pipe_enable(dev_priv, pipe); + trace_intel_pipe_enable(crtc); reg = PIPECONF(cpu_transcoder); val = I915_READ(reg); @@ -1880,7 +1880,7 @@ static void intel_disable_pipe(const struct intel_crtc_state *old_crtc_state) */ assert_planes_disabled(crtc); - trace_intel_pipe_disable(dev_priv, pipe); + trace_intel_pipe_disable(crtc); reg = PIPECONF(cpu_transcoder); val = I915_READ(reg); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index f4ce643b3bc3..cce426b23a24 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -21,24 +21,22 @@ /* watermark/fifo updates */ TRACE_EVENT(intel_pipe_enable, - TP_PROTO(struct drm_i915_private *dev_priv, enum pipe pipe), - TP_ARGS(dev_priv, pipe), + TP_PROTO(struct intel_crtc *crtc), + TP_ARGS(crtc), TP_STRUCT__entry( __array(u32, frame, 3) __array(u32, scanline, 3) __field(enum pipe, pipe) ), - TP_fast_assign( - enum pipe _pipe; - for_each_pipe(dev_priv, _pipe) { - __entry->frame[_pipe] = - dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, _pipe); - __entry->scanline[_pipe] = - intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, _pipe)); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc *it__; + for_each_intel_crtc(&dev_priv->drm, it__) { + __entry->frame[it__->pipe] = intel_crtc_get_vblank_counter(it__); + __entry->scanline[it__->pipe] = intel_get_crtc_scanline(it__); } - __entry->pipe = pipe; + __entry->pipe = crtc->pipe; ), TP_printk("pipe %c enable, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u", @@ -49,8 +47,8 @@ TRACE_EVENT(intel_pipe_enable, ); TRACE_EVENT(intel_pipe_disable, - TP_PROTO(struct drm_i915_private *dev_priv, enum pipe pipe), - TP_ARGS(dev_priv, pipe), + TP_PROTO(struct intel_crtc *crtc), + TP_ARGS(crtc), TP_STRUCT__entry( __array(u32, frame, 3) @@ -59,14 +57,13 @@ TRACE_EVENT(intel_pipe_disable, ), TP_fast_assign( - enum pipe _pipe; - for_each_pipe(dev_priv, _pipe) { - __entry->frame[_pipe] = - dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, _pipe); - __entry->scanline[_pipe] = - intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, _pipe)); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc *it__; + for_each_intel_crtc(&dev_priv->drm, it__) { + __entry->frame[it__->pipe] = intel_crtc_get_vblank_counter(it__); + __entry->scanline[it__->pipe] = intel_get_crtc_scanline(it__); } - __entry->pipe = pipe; + __entry->pipe = crtc->pipe; ), TP_printk("pipe %c disable, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u", @@ -89,8 +86,7 @@ TRACE_EVENT(intel_pipe_crc, TP_fast_assign( __entry->pipe = crtc->pipe; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); memcpy(__entry->crcs, crcs, sizeof(__entry->crcs)); ), @@ -112,9 +108,10 @@ TRACE_EVENT(intel_cpu_fifo_underrun, ), TP_fast_assign( + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); __entry->pipe = pipe; - __entry->frame = dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); - __entry->scanline = intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); ), TP_printk("pipe %c, frame=%u, scanline=%u", @@ -134,9 +131,10 @@ TRACE_EVENT(intel_pch_fifo_underrun, TP_fast_assign( enum pipe pipe = pch_transcoder; + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); __entry->pipe = pipe; - __entry->frame = dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); - __entry->scanline = intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); ), TP_printk("pch transcoder %c, frame=%u, scanline=%u", @@ -156,12 +154,10 @@ TRACE_EVENT(intel_memory_cxsr, ), TP_fast_assign( - enum pipe pipe; - for_each_pipe(dev_priv, pipe) { - __entry->frame[pipe] = - dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); - __entry->scanline[pipe] = - intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + struct intel_crtc *crtc; + for_each_intel_crtc(&dev_priv->drm, crtc) { + __entry->frame[crtc->pipe] = intel_crtc_get_vblank_counter(crtc); + __entry->scanline[crtc->pipe] = intel_get_crtc_scanline(crtc); } __entry->old = old; __entry->new = new; @@ -198,8 +194,7 @@ TRACE_EVENT(g4x_wm, TP_fast_assign( __entry->pipe = crtc->pipe; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); __entry->primary = wm->pipe[crtc->pipe].plane[PLANE_PRIMARY]; __entry->sprite = wm->pipe[crtc->pipe].plane[PLANE_SPRITE0]; @@ -243,8 +238,7 @@ TRACE_EVENT(vlv_wm, TP_fast_assign( __entry->pipe = crtc->pipe; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); __entry->level = wm->level; __entry->cxsr = wm->cxsr; @@ -278,8 +272,7 @@ TRACE_EVENT(vlv_fifo_size, TP_fast_assign( __entry->pipe = crtc->pipe; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); __entry->sprite0_start = sprite0_start; __entry->sprite1_start = sprite1_start; @@ -310,8 +303,7 @@ TRACE_EVENT(intel_update_plane, TP_fast_assign( __entry->pipe = crtc->pipe; __entry->name = plane->name; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); memcpy(__entry->src, &plane->state->src, sizeof(__entry->src)); memcpy(__entry->dst, &plane->state->dst, sizeof(__entry->dst)); @@ -338,8 +330,7 @@ TRACE_EVENT(intel_disable_plane, TP_fast_assign( __entry->pipe = crtc->pipe; __entry->name = plane->name; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); ), @@ -364,8 +355,7 @@ TRACE_EVENT(i915_pipe_update_start, TP_fast_assign( __entry->pipe = crtc->pipe; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); __entry->min = crtc->debug.min_vbl; __entry->max = crtc->debug.max_vbl; From b830f94f7303a49d509d5b1bb34ecb2e648b23c4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 26 Jul 2019 12:49:00 -0300 Subject: [PATCH 079/277] tools headers UAPI: Update tools's copy of mman.h headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To pick up the changes from: 8aa3c927ec10 ("mm/mmap: move common defines to mman-common.h") 22fcea6f85f2 ("mm: move MAP_SYNC to asm-generic/mman-common.h") 0bf5f9492389 ("mm: fix the MAP_UNINITIALIZED flag") To address the following perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/mman-common.h' differs from latest version at 'include/uapi/asm-generic/mman-common.h' diff -u tools/include/uapi/asm-generic/mman-common.h include/uapi/asm-generic/mman-common.h Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/mman.h' differs from latest version at 'include/uapi/asm-generic/mman.h' diff -u tools/include/uapi/asm-generic/mman.h include/uapi/asm-generic/mman.h That ends up just moving a bit the auto-generated code->string tables: $ tools/perf/trace/beauty/mmap_flags.sh > before $ cp include/uapi/asm-generic/mman.h tools/include/uapi/asm-generic/mman.h $ cp include/uapi/asm-generic/mman-common.h tools/include/uapi/asm-generic/mman-common.h $ tools/perf/trace/beauty/mmap_flags.sh > after $ diff -u before after --- before 2019-07-26 12:45:02.948335904 -0300 +++ after 2019-07-26 12:48:05.342893539 -0300 @@ -4,15 +4,15 @@ [ilog2(0x02) + 1] = "PRIVATE", [ilog2(0x10) + 1] = "FIXED", [ilog2(0x20) + 1] = "ANONYMOUS", + [ilog2(0x008000) + 1] = "POPULATE", + [ilog2(0x010000) + 1] = "NONBLOCK", + [ilog2(0x020000) + 1] = "STACK", + [ilog2(0x040000) + 1] = "HUGETLB", + [ilog2(0x080000) + 1] = "SYNC", [ilog2(0x100000) + 1] = "FIXED_NOREPLACE", [ilog2(0x0100) + 1] = "GROWSDOWN", [ilog2(0x0800) + 1] = "DENYWRITE", [ilog2(0x1000) + 1] = "EXECUTABLE", [ilog2(0x2000) + 1] = "LOCKED", [ilog2(0x4000) + 1] = "NORESERVE", - [ilog2(0x8000) + 1] = "POPULATE", - [ilog2(0x10000) + 1] = "NONBLOCK", - [ilog2(0x20000) + 1] = "STACK", - [ilog2(0x40000) + 1] = "HUGETLB", - [ilog2(0x80000) + 1] = "SYNC", }; $ Cc: Adrian Hunter Cc: Aneesh Kumar K.V Cc: Christoph Hellwig Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-fzqvzni9megaurmsp0k4vy27@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/powerpc/include/uapi/asm/mman.h | 4 ---- tools/arch/sparc/include/uapi/asm/mman.h | 4 ---- tools/include/uapi/asm-generic/mman-common.h | 15 +++++++++------ tools/include/uapi/asm-generic/mman.h | 10 ++++------ 4 files changed, 13 insertions(+), 20 deletions(-) diff --git a/tools/arch/powerpc/include/uapi/asm/mman.h b/tools/arch/powerpc/include/uapi/asm/mman.h index f33105bc5ca6..8601d824a9c6 100644 --- a/tools/arch/powerpc/include/uapi/asm/mman.h +++ b/tools/arch/powerpc/include/uapi/asm/mman.h @@ -4,12 +4,8 @@ #define MAP_DENYWRITE 0x0800 #define MAP_EXECUTABLE 0x1000 #define MAP_GROWSDOWN 0x0100 -#define MAP_HUGETLB 0x40000 #define MAP_LOCKED 0x80 -#define MAP_NONBLOCK 0x10000 #define MAP_NORESERVE 0x40 -#define MAP_POPULATE 0x8000 -#define MAP_STACK 0x20000 #include /* MAP_32BIT is undefined on powerpc, fix it for perf */ #define MAP_32BIT 0 diff --git a/tools/arch/sparc/include/uapi/asm/mman.h b/tools/arch/sparc/include/uapi/asm/mman.h index 38920eed8cbf..7b94dccc843d 100644 --- a/tools/arch/sparc/include/uapi/asm/mman.h +++ b/tools/arch/sparc/include/uapi/asm/mman.h @@ -4,12 +4,8 @@ #define MAP_DENYWRITE 0x0800 #define MAP_EXECUTABLE 0x1000 #define MAP_GROWSDOWN 0x0200 -#define MAP_HUGETLB 0x40000 #define MAP_LOCKED 0x100 -#define MAP_NONBLOCK 0x10000 #define MAP_NORESERVE 0x40 -#define MAP_POPULATE 0x8000 -#define MAP_STACK 0x20000 #include /* MAP_32BIT is undefined on sparc, fix it for perf */ #define MAP_32BIT 0 diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index abd238d0f7a4..63b1f506ea67 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -19,15 +19,18 @@ #define MAP_TYPE 0x0f /* Mask for type of mapping */ #define MAP_FIXED 0x10 /* Interpret addr exactly */ #define MAP_ANONYMOUS 0x20 /* don't use a file */ -#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED -# define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be uninitialized */ -#else -# define MAP_UNINITIALIZED 0x0 /* Don't support this flag */ -#endif -/* 0x0100 - 0x80000 flags are defined in asm-generic/mman.h */ +/* 0x0100 - 0x4000 flags are defined in asm-generic/mman.h */ +#define MAP_POPULATE 0x008000 /* populate (prefault) pagetables */ +#define MAP_NONBLOCK 0x010000 /* do not block on IO */ +#define MAP_STACK 0x020000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x040000 /* create a huge page mapping */ +#define MAP_SYNC 0x080000 /* perform synchronous page faults for the mapping */ #define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ +#define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be + * uninitialized */ + /* * Flags for mlock */ diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h index 36c197fc44a0..406f7718f9ad 100644 --- a/tools/include/uapi/asm-generic/mman.h +++ b/tools/include/uapi/asm-generic/mman.h @@ -9,13 +9,11 @@ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ #define MAP_LOCKED 0x2000 /* pages are locked */ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ -#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ -#define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ -#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ -#define MAP_SYNC 0x80000 /* perform synchronous page faults for the mapping */ -/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */ +/* + * Bits [26:31] are reserved, see asm-generic/hugetlb_encode.h + * for MAP_HUGETLB usage + */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ From 95dc663aa6382fec92674e748682cefeeb2bfc22 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 26 Jul 2019 15:00:24 -0300 Subject: [PATCH 080/277] tools headers UAPI: Update tools's copy of drm.h headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Picking the changes from: c5d3e39caa45 ("drm/i915: Engine discovery query") a88b6e4cbafd ("drm/i915: Allow specification of parallel execbuf") ee1136908e9b ("drm/i915/execlists: Virtual engine bonding") 6d06779e8672 ("drm/i915: Load balancing across a virtual engine") b81dde719439 ("drm/i915: Allow userspace to clone contexts on creation") 8319f44c0525 ("drm/i915: Re-expose SINGLE_TIMELINE flags for context creation") e620f7b3a263 ("drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[]") 976b55f0e1db ("drm/i915: Allow a context to define its set of engines") 7f3f317a66ca ("drm/i915: Restore control over ppgtt for context creation ABI") 75b3f1cb50bd ("drm: Fix drm.h uapi header for GNU/kFreeBSD") Silencing these perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/drm/drm.h' differs from latest version at 'include/uapi/drm/drm.h' diff -u tools/include/uapi/drm/drm.h include/uapi/drm/drm.h Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h' diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h Now 'perf trace' and other code that might use the tools/perf/trace/beauty autogenerated tables will be able to translate this new ioctl code into a string: $ tools/perf/trace/beauty/drm_ioctl.sh > before $ cp include/uapi/drm/i915_drm.h tools/include/uapi/drm/i915_drm.h $ tools/perf/trace/beauty/drm_ioctl.sh > after $ diff -u before after --- before 2019-07-26 13:02:22.052723640 -0300 +++ after 2019-07-26 13:02:35.354906036 -0300 @@ -163,4 +163,6 @@ [DRM_COMMAND_BASE + 0x37] = "I915_PERF_ADD_CONFIG", [DRM_COMMAND_BASE + 0x38] = "I915_PERF_REMOVE_CONFIG", [DRM_COMMAND_BASE + 0x39] = "I915_QUERY", + [DRM_COMMAND_BASE + 0x3a] = "I915_GEM_VM_CREATE", + [DRM_COMMAND_BASE + 0x3b] = "I915_GEM_VM_DESTROY", }; $ Cc: Adrian Hunter Cc: Chris Wilson Cc: Eric Anholt Cc: James Clarke Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Cc: Tvrtko Ursulin Link: https://lkml.kernel.org/n/tip-a9173whgu3h1vo24jgdg5do8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/drm.h | 1 + tools/include/uapi/drm/i915_drm.h | 209 +++++++++++++++++++++++++++++- 2 files changed, 207 insertions(+), 3 deletions(-) diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 661d73f9a919..8a5b2f8f8eb9 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -50,6 +50,7 @@ typedef unsigned int drm_handle_t; #else /* One of the BSDs */ +#include #include #include typedef int8_t __s8; diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 3a73f5316766..328d05e77d9f 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -136,6 +136,8 @@ enum drm_i915_gem_engine_class { struct i915_engine_class_instance { __u16 engine_class; /* see enum drm_i915_gem_engine_class */ __u16 engine_instance; +#define I915_ENGINE_CLASS_INVALID_NONE -1 +#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2 }; /** @@ -355,6 +357,8 @@ typedef struct _drm_i915_sarea { #define DRM_I915_PERF_ADD_CONFIG 0x37 #define DRM_I915_PERF_REMOVE_CONFIG 0x38 #define DRM_I915_QUERY 0x39 +#define DRM_I915_GEM_VM_CREATE 0x3a +#define DRM_I915_GEM_VM_DESTROY 0x3b /* Must be kept compact -- no holes */ #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) @@ -415,6 +419,8 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config) #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64) #define DRM_IOCTL_I915_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query) +#define DRM_IOCTL_I915_GEM_VM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control) +#define DRM_IOCTL_I915_GEM_VM_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -598,6 +604,12 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_MMAP_GTT_COHERENT 52 +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel + * execution through use of explicit fence support. + * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT. + */ +#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53 /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -1120,7 +1132,16 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_FENCE_ARRAY (1<<19) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1)) +/* + * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent + * a sync_file fd to wait upon (in a nonblocking manner) prior to executing + * the batch. + * + * Returns -EINVAL if the sync_file fd cannot be found. + */ +#define I915_EXEC_FENCE_SUBMIT (1 << 20) + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ @@ -1464,8 +1485,9 @@ struct drm_i915_gem_context_create_ext { __u32 ctx_id; /* output: id of new context*/ __u32 flags; #define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS (1u << 0) +#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1u << 1) #define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \ - (-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1)) + (-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1)) __u64 extensions; }; @@ -1507,6 +1529,41 @@ struct drm_i915_gem_context_param { * On creation, all new contexts are marked as recoverable. */ #define I915_CONTEXT_PARAM_RECOVERABLE 0x8 + + /* + * The id of the associated virtual memory address space (ppGTT) of + * this context. Can be retrieved and passed to another context + * (on the same fd) for both to use the same ppGTT and so share + * address layouts, and avoid reloading the page tables on context + * switches between themselves. + * + * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY. + */ +#define I915_CONTEXT_PARAM_VM 0x9 + +/* + * I915_CONTEXT_PARAM_ENGINES: + * + * Bind this context to operate on this subset of available engines. Henceforth, + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0] + * and upwards. Slots 0...N are filled in using the specified (class, instance). + * Use + * engine_class: I915_ENGINE_CLASS_INVALID, + * engine_instance: I915_ENGINE_CLASS_INVALID_NONE + * to specify a gap in the array that can be filled in later, e.g. by a + * virtual engine used for load balancing. + * + * Setting the number of engines bound to the context to 0, by passing a zero + * sized argument, will revert back to default settings. + * + * See struct i915_context_param_engines. + * + * Extensions: + * i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE) + * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND) + */ +#define I915_CONTEXT_PARAM_ENGINES 0xa /* Must be kept compact -- no holes and well documented */ __u64 value; @@ -1540,9 +1597,10 @@ struct drm_i915_gem_context_param_sseu { struct i915_engine_class_instance engine; /* - * Unused for now. Must be cleared to zero. + * Unknown flags must be cleared to zero. */ __u32 flags; +#define I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX (1u << 0) /* * Mask of slices to enable for the context. Valid values are a subset @@ -1570,12 +1628,115 @@ struct drm_i915_gem_context_param_sseu { __u32 rsvd; }; +/* + * i915_context_engines_load_balance: + * + * Enable load balancing across this set of engines. + * + * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when + * used will proxy the execbuffer request onto one of the set of engines + * in such a way as to distribute the load evenly across the set. + * + * The set of engines must be compatible (e.g. the same HW class) as they + * will share the same logical GPU context and ring. + * + * To intermix rendering with the virtual engine and direct rendering onto + * the backing engines (bypassing the load balancing proxy), the context must + * be defined to use a single timeline for all engines. + */ +struct i915_context_engines_load_balance { + struct i915_user_extension base; + + __u16 engine_index; + __u16 num_siblings; + __u32 flags; /* all undefined flags must be zero */ + + __u64 mbz64; /* reserved for future use; must be zero */ + + struct i915_engine_class_instance engines[0]; +} __attribute__((packed)); + +#define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) struct { \ + struct i915_user_extension base; \ + __u16 engine_index; \ + __u16 num_siblings; \ + __u32 flags; \ + __u64 mbz64; \ + struct i915_engine_class_instance engines[N__]; \ +} __attribute__((packed)) name__ + +/* + * i915_context_engines_bond: + * + * Constructed bonded pairs for execution within a virtual engine. + * + * All engines are equal, but some are more equal than others. Given + * the distribution of resources in the HW, it may be preferable to run + * a request on a given subset of engines in parallel to a request on a + * specific engine. We enable this selection of engines within a virtual + * engine by specifying bonding pairs, for any given master engine we will + * only execute on one of the corresponding siblings within the virtual engine. + * + * To execute a request in parallel on the master engine and a sibling requires + * coordination with a I915_EXEC_FENCE_SUBMIT. + */ +struct i915_context_engines_bond { + struct i915_user_extension base; + + struct i915_engine_class_instance master; + + __u16 virtual_index; /* index of virtual engine in ctx->engines[] */ + __u16 num_bonds; + + __u64 flags; /* all undefined flags must be zero */ + __u64 mbz64[4]; /* reserved for future use; must be zero */ + + struct i915_engine_class_instance engines[0]; +} __attribute__((packed)); + +#define I915_DEFINE_CONTEXT_ENGINES_BOND(name__, N__) struct { \ + struct i915_user_extension base; \ + struct i915_engine_class_instance master; \ + __u16 virtual_index; \ + __u16 num_bonds; \ + __u64 flags; \ + __u64 mbz64[4]; \ + struct i915_engine_class_instance engines[N__]; \ +} __attribute__((packed)) name__ + +struct i915_context_param_engines { + __u64 extensions; /* linked chain of extension blocks, 0 terminates */ +#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ +#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */ + struct i915_engine_class_instance engines[0]; +} __attribute__((packed)); + +#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \ + __u64 extensions; \ + struct i915_engine_class_instance engines[N__]; \ +} __attribute__((packed)) name__ + struct drm_i915_gem_context_create_ext_setparam { #define I915_CONTEXT_CREATE_EXT_SETPARAM 0 struct i915_user_extension base; struct drm_i915_gem_context_param param; }; +struct drm_i915_gem_context_create_ext_clone { +#define I915_CONTEXT_CREATE_EXT_CLONE 1 + struct i915_user_extension base; + __u32 clone_id; + __u32 flags; +#define I915_CONTEXT_CLONE_ENGINES (1u << 0) +#define I915_CONTEXT_CLONE_FLAGS (1u << 1) +#define I915_CONTEXT_CLONE_SCHEDATTR (1u << 2) +#define I915_CONTEXT_CLONE_SSEU (1u << 3) +#define I915_CONTEXT_CLONE_TIMELINE (1u << 4) +#define I915_CONTEXT_CLONE_VM (1u << 5) +#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1) + __u64 rsvd; +}; + struct drm_i915_gem_context_destroy { __u32 ctx_id; __u32 pad; @@ -1821,6 +1982,7 @@ struct drm_i915_perf_oa_config { struct drm_i915_query_item { __u64 query_id; #define DRM_I915_QUERY_TOPOLOGY_INFO 1 +#define DRM_I915_QUERY_ENGINE_INFO 2 /* Must be kept compact -- no holes and well documented */ /* @@ -1919,6 +2081,47 @@ struct drm_i915_query_topology_info { __u8 data[]; }; +/** + * struct drm_i915_engine_info + * + * Describes one engine and it's capabilities as known to the driver. + */ +struct drm_i915_engine_info { + /** Engine class and instance. */ + struct i915_engine_class_instance engine; + + /** Reserved field. */ + __u32 rsvd0; + + /** Engine flags. */ + __u64 flags; + + /** Capabilities of this engine. */ + __u64 capabilities; +#define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0) +#define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1) + + /** Reserved fields. */ + __u64 rsvd1[4]; +}; + +/** + * struct drm_i915_query_engine_info + * + * Engine info query enumerates all engines known to the driver by filling in + * an array of struct drm_i915_engine_info structures. + */ +struct drm_i915_query_engine_info { + /** Number of struct drm_i915_engine_info structs following. */ + __u32 num_engines; + + /** MBZ */ + __u32 rsvd[3]; + + /** Marker for drm_i915_engine_info structures. */ + struct drm_i915_engine_info engines[]; +}; + #if defined(__cplusplus) } #endif From 7ee526152db7a75d7b8713346dac76ffc3662b29 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 26 Jul 2019 15:29:56 -0300 Subject: [PATCH 081/277] tools perf beauty: Fix usbdevfs_ioctl table generator to handle _IOC() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In addition to _IOW() and _IOR(), to handle this case: #define USBDEVFS_CONNINFO_EX(len) _IOC(_IOC_READ, 'U', 32, len) That will happen in the next sync of this header file. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-3br5e4t64e4lp0goo84che3s@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/usbdevfs_ioctl.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/perf/trace/beauty/usbdevfs_ioctl.sh b/tools/perf/trace/beauty/usbdevfs_ioctl.sh index 930b80f422e8..aa597ae53747 100755 --- a/tools/perf/trace/beauty/usbdevfs_ioctl.sh +++ b/tools/perf/trace/beauty/usbdevfs_ioctl.sh @@ -3,10 +3,13 @@ [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ +# also as: +# #define USBDEVFS_CONNINFO_EX(len) _IOC(_IOC_READ, 'U', 32, len) + printf "static const char *usbdevfs_ioctl_cmds[] = {\n" -regex="^#[[:space:]]*define[[:space:]]+USBDEVFS_(\w+)[[:space:]]+_IO[WR]{0,2}\([[:space:]]*'U'[[:space:]]*,[[:space:]]*([[:digit:]]+).*" -egrep $regex ${header_dir}/usbdevice_fs.h | egrep -v 'USBDEVFS_\w+32[[:space:]]' | \ - sed -r "s/$regex/\2 \1/g" | \ +regex="^#[[:space:]]*define[[:space:]]+USBDEVFS_(\w+)(\(\w+\))?[[:space:]]+_IO[CWR]{0,2}\([[:space:]]*(_IOC_\w+,[[:space:]]*)?'U'[[:space:]]*,[[:space:]]*([[:digit:]]+).*" +egrep "$regex" ${header_dir}/usbdevice_fs.h | egrep -v 'USBDEVFS_\w+32[[:space:]]' | \ + sed -r "s/$regex/\4 \1/g" | \ sort | xargs printf "\t[%s] = \"%s\",\n" printf "};\n\n" printf "#if 0\n" From 0f58163c9d5702efbc242d144fd038e54b4c6ad0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 26 Jul 2019 15:31:25 -0300 Subject: [PATCH 082/277] tools headers UAPI: Sync usbdevice_fs.h with the kernels to get new ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To get the changes in: 6d101f24f1dd ("USB: add usbfs ioctl to retrieve the connection parameters") And address this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/usbdevice_fs.h' differs from latest version at 'include/uapi/linux/usbdevice_fs.h' diff -u tools/include/uapi/linux/usbdevice_fs.h include/uapi/linux/usbdevice_fs.h Which ends up autogenerating a ioctl_cmd->string table used by 'perf trace': $ tools/perf/trace/beauty/usbdevfs_ioctl.sh > before $ cp include/uapi/linux/usbdevice_fs.h tools/include/uapi/linux/usbdevice_fs.h $ tools/perf/trace/beauty/usbdevfs_ioctl.sh > after $ diff -u before after --- before 2019-07-26 15:26:55.513636844 -0300 +++ after 2019-07-26 15:29:11.650518677 -0300 @@ -23,6 +23,7 @@ [2] = "BULK", [30] = "DROP_PRIVILEGES", [31] = "GET_SPEED", + [32] = "CONNINFO_EX", [3] = "RESETEP", [4] = "SETINTERFACE", [5] = "SETCONFIGURATION", $ Now 'perf trace' ioctl beautifier will translate this new ioctl to a string and at some point will allow filtering the 'ioctl' syscall with something like this in a system wide strace-like sessin: # perf trace -e ioctl/cmd=USBDEVFS_CONNINFO_EX/ Cc: Adrian Hunter Cc: Dmitry Torokhov Cc: Greg Kroah-Hartman Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-tkdfbgzqypwco96b309c0ovd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/usbdevice_fs.h | 26 +++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/include/uapi/linux/usbdevice_fs.h b/tools/include/uapi/linux/usbdevice_fs.h index 964e87217be4..78efe870c2b7 100644 --- a/tools/include/uapi/linux/usbdevice_fs.h +++ b/tools/include/uapi/linux/usbdevice_fs.h @@ -76,6 +76,26 @@ struct usbdevfs_connectinfo { unsigned char slow; }; +struct usbdevfs_conninfo_ex { + __u32 size; /* Size of the structure from the kernel's */ + /* point of view. Can be used by userspace */ + /* to determine how much data can be */ + /* used/trusted. */ + __u32 busnum; /* USB bus number, as enumerated by the */ + /* kernel, the device is connected to. */ + __u32 devnum; /* Device address on the bus. */ + __u32 speed; /* USB_SPEED_* constants from ch9.h */ + __u8 num_ports; /* Number of ports the device is connected */ + /* to on the way to the root hub. It may */ + /* be bigger than size of 'ports' array so */ + /* userspace can detect overflows. */ + __u8 ports[7]; /* List of ports on the way from the root */ + /* hub to the device. Current limit in */ + /* USB specification is 7 tiers (root hub, */ + /* 5 intermediate hubs, device), which */ + /* gives at most 6 port entries. */ +}; + #define USBDEVFS_URB_SHORT_NOT_OK 0x01 #define USBDEVFS_URB_ISO_ASAP 0x02 #define USBDEVFS_URB_BULK_CONTINUATION 0x04 @@ -137,6 +157,7 @@ struct usbdevfs_hub_portinfo { #define USBDEVFS_CAP_REAP_AFTER_DISCONNECT 0x10 #define USBDEVFS_CAP_MMAP 0x20 #define USBDEVFS_CAP_DROP_PRIVILEGES 0x40 +#define USBDEVFS_CAP_CONNINFO_EX 0x80 /* USBDEVFS_DISCONNECT_CLAIM flags & struct */ @@ -197,5 +218,10 @@ struct usbdevfs_streams { #define USBDEVFS_FREE_STREAMS _IOR('U', 29, struct usbdevfs_streams) #define USBDEVFS_DROP_PRIVILEGES _IOW('U', 30, __u32) #define USBDEVFS_GET_SPEED _IO('U', 31) +/* + * Returns struct usbdevfs_conninfo_ex; length is variable to allow + * extending size of the data returned. + */ +#define USBDEVFS_CONNINFO_EX(len) _IOC(_IOC_READ, 'U', 32, len) #endif /* _UAPI_LINUX_USBDEVICE_FS_H */ From c093de6bd3c50d3dd597ff9fa5cf7a30acbb3eb7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 26 Jul 2019 15:41:09 -0300 Subject: [PATCH 083/277] tools headers UAPI: Sync sched.h with the kernel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To get the changes in: a509a7cd7974 ("sched/uclamp: Extend sched_setattr() to support utilization clamping") 1d6362fa0cfc ("sched/core: Allow sched_setattr() to use the current policy") 7f192e3cd316 ("fork: add clone3") And silence this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/sched.h' differs from latest version at 'include/uapi/linux/sched.h' diff -u tools/include/uapi/linux/sched.h include/uapi/linux/sched.h No changes in tools/ due to the above. Cc: Adrian Hunter Cc: Christian Brauner Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Cc: Patrick Bellasi Link: https://lkml.kernel.org/n/tip-mtrpsjrux5hgyr5uf8l1aa46@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/sched.h | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h index ed4ee170bee2..b3105ac1381a 100644 --- a/tools/include/uapi/linux/sched.h +++ b/tools/include/uapi/linux/sched.h @@ -2,6 +2,8 @@ #ifndef _UAPI_LINUX_SCHED_H #define _UAPI_LINUX_SCHED_H +#include + /* * cloning flags: */ @@ -31,6 +33,20 @@ #define CLONE_NEWNET 0x40000000 /* New network namespace */ #define CLONE_IO 0x80000000 /* Clone io context */ +/* + * Arguments for the clone3 syscall + */ +struct clone_args { + __aligned_u64 flags; + __aligned_u64 pidfd; + __aligned_u64 child_tid; + __aligned_u64 parent_tid; + __aligned_u64 exit_signal; + __aligned_u64 stack; + __aligned_u64 stack_size; + __aligned_u64 tls; +}; + /* * Scheduling policies */ @@ -51,9 +67,21 @@ #define SCHED_FLAG_RESET_ON_FORK 0x01 #define SCHED_FLAG_RECLAIM 0x02 #define SCHED_FLAG_DL_OVERRUN 0x04 +#define SCHED_FLAG_KEEP_POLICY 0x08 +#define SCHED_FLAG_KEEP_PARAMS 0x10 +#define SCHED_FLAG_UTIL_CLAMP_MIN 0x20 +#define SCHED_FLAG_UTIL_CLAMP_MAX 0x40 + +#define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \ + SCHED_FLAG_KEEP_PARAMS) + +#define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \ + SCHED_FLAG_UTIL_CLAMP_MAX) #define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \ SCHED_FLAG_RECLAIM | \ - SCHED_FLAG_DL_OVERRUN) + SCHED_FLAG_DL_OVERRUN | \ + SCHED_FLAG_KEEP_ALL | \ + SCHED_FLAG_UTIL_CLAMP) #endif /* _UAPI_LINUX_SCHED_H */ From e54599c93dbf487ef80ba2833c5760c22bd20c32 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 26 Jul 2019 15:44:41 -0300 Subject: [PATCH 084/277] tools headers UAPI: Sync if_link.h with the kernel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To pick the changes in: 07a4ddec3ce9 ("bonding: add an option to specify a delay between peer notifications") And silence this build warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h' Cc: Adrian Hunter Cc: David S. Miller Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Cc: Vincent Bernat Link: https://lkml.kernel.org/n/tip-3liw4exxh8goc0rq9xryl2kv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/if_link.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 7d113a9602f0..4a8c02cafa9a 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -695,6 +695,7 @@ enum { IFLA_VF_IB_NODE_GUID, /* VF Infiniband node GUID */ IFLA_VF_IB_PORT_GUID, /* VF Infiniband port GUID */ IFLA_VF_VLAN_LIST, /* nested list of vlans, option for QinQ */ + IFLA_VF_BROADCAST, /* VF broadcast */ __IFLA_VF_MAX, }; @@ -705,6 +706,10 @@ struct ifla_vf_mac { __u8 mac[32]; /* MAX_ADDR_LEN */ }; +struct ifla_vf_broadcast { + __u8 broadcast[32]; +}; + struct ifla_vf_vlan { __u32 vf; __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ From 7622236ceb167aa3857395f9bdaf871442aa467e Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Tue, 23 Jul 2019 11:06:01 -0400 Subject: [PATCH 085/277] perf header: Fix divide by zero error if f_header.attr_size==0 So I have been having lots of trouble with hand-crafted perf.data files causing segfaults and the like, so I have started fuzzing the perf tool. First issue found: If f_header.attr_size is 0 in the perf.data file, then perf will crash with a divide-by-zero error. Committer note: Added a pr_err() to tell the user why the command failed. Signed-off-by: Vince Weaver Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1907231100440.14532@macbook-air Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 20111f8da5cb..47877f0f6667 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3559,6 +3559,13 @@ int perf_session__read_header(struct perf_session *session) data->file.path); } + if (f_header.attr_size == 0) { + pr_err("ERROR: The %s file's attr size field is 0 which is unexpected.\n" + "Was the 'perf record' command properly terminated?\n", + data->file.path); + return -EINVAL; + } + nr_attrs = f_header.attrs.size / f_header.attr_size; lseek(fd, f_header.attrs.offset, SEEK_SET); From 20f9781f491360e7459c589705a2e4b1f136bee9 Mon Sep 17 00:00:00 2001 From: Numfor Mbiziwo-Tiapo Date: Wed, 24 Jul 2019 16:44:58 -0700 Subject: [PATCH 086/277] perf header: Fix use of unitialized value warning When building our local version of perf with MSAN (Memory Sanitizer) and running the perf record command, MSAN throws a use of uninitialized value warning in "tools/perf/util/util.c:333:6". This warning stems from the "buf" variable being passed into "write". It originated as the variable "ev" with the type union perf_event* defined in the "perf_event__synthesize_attr" function in "tools/perf/util/header.c". In the "perf_event__synthesize_attr" function they allocate space with a malloc call using ev, then go on to only assign some of the member variables before passing "ev" on as a parameter to the "process" function therefore "ev" contains uninitialized memory. Changing the malloc call to zalloc to initialize all the members of "ev" which gets rid of the warning. To reproduce this warning, build perf by running: make -C tools/perf CLANG=1 CC=clang EXTRA_CFLAGS="-fsanitize=memory\ -fsanitize-memory-track-origins" (Additionally, llvm might have to be installed and clang might have to be specified as the compiler - export CC=/usr/bin/clang) then running: tools/perf/perf record -o - ls / | tools/perf/perf --no-pager annotate\ -i - --stdio Please see the cover letter for why false positive warnings may be generated. Signed-off-by: Numfor Mbiziwo-Tiapo Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Drayton Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190724234500.253358-2-nums@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 47877f0f6667..1903d7ec9797 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3646,7 +3646,7 @@ int perf_event__synthesize_attr(struct perf_tool *tool, size += sizeof(struct perf_event_header); size += ids * sizeof(u64); - ev = malloc(size); + ev = zalloc(size); if (ev == NULL) return -ENOMEM; From 2e9a06dda10aea81a17c623f08534dac6735434a Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Thu, 25 Jul 2019 11:57:43 -0400 Subject: [PATCH 087/277] perf tools: Fix perf.data documentation units for memory size The perf.data-file-format documentation incorrectly says the HEADER_TOTAL_MEM results are in bytes. The results are in kilobytes (perf reads the value from /proc/meminfo) Signed-off-by: Vince Weaver Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1907251155500.22624@macbook-air Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf.data-file-format.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index 5f54feb19977..d030c87ed9f5 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -126,7 +126,7 @@ vendor,family,model,stepping. For example: GenuineIntel,6,69,1 HEADER_TOTAL_MEM = 10, -An uint64_t with the total memory in bytes. +An uint64_t with the total memory in kilobytes. HEADER_CMDLINE = 11, From 705d0abbcc7adf04743d04ea8754acbcdf21c326 Mon Sep 17 00:00:00 2001 From: Santosh Sivaraj Date: Mon, 29 Jul 2019 11:25:36 +0530 Subject: [PATCH 088/277] powerpc/kvm: Fall through switch case explicitly Implicit fallthrough warning was enabled globally which broke the build. Make it explicit with a `fall through` comment. Signed-off-by: Santosh Sivaraj Reviewed-by: Stephen Rothwell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190729055536.25591-1-santosh@fossix.org --- arch/powerpc/kvm/book3s_32_mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 653936177857..18f244aad7aa 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -239,6 +239,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, case 2: case 6: pte->may_write = true; + /* fall through */ case 3: case 5: case 7: From c270cac40828eca4fb8d7c27cab1d0ac7765ff3d Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Sat, 29 Jun 2019 14:13:50 +0100 Subject: [PATCH 089/277] drm/i915: fix whitelist selftests with readonly registers When a register is readonly there is not much we can tell about its value (apart from its default value?). This can be covered by tests exercising the value of the register from userspace. For PS_INVOCATION_COUNT we've got the following piglit tests : KHR-GL45.pipeline_statistics_query_tests_ARB.functional_fragment_shader_invocations Vulkan CTS tests : dEQP-VK.query_pool.statistics_query.fragment_shader_invocations.* v2: Use a local to shrink under 80cols. Signed-off-by: Lionel Landwerlin Fixes: 86554f48e511 ("drm/i915/selftests: Verify whitelist of context registers") Tested-by: Anuj Phogat Signed-off-by: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190629131350.31185-1-chris@chris-wilson.co.uk (cherry picked from commit 361b69051326ed0e07553315227678d00d651a9e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 9eaf030affd0..44becd9538be 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -925,7 +925,12 @@ check_whitelisted_registers(struct intel_engine_cs *engine, err = 0; for (i = 0; i < engine->whitelist.count; i++) { - if (!fn(engine, a[i], b[i], engine->whitelist.list[i].reg)) + const struct i915_wa *wa = &engine->whitelist.list[i]; + + if (i915_mmio_reg_offset(wa->reg) & RING_FORCE_TO_NONPRIV_RD) + continue; + + if (!fn(engine, a[i], b[i], wa->reg)) err = -EINVAL; } From 6ce5bfe936ac31d5c52c4b1328d0bfda5f97e7ca Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 28 Jun 2019 15:07:19 +0300 Subject: [PATCH 090/277] drm/i915: whitelist PS_(DEPTH|INVOCATION)_COUNT CFL:C0+ changed the status of those registers which are now blacklisted by default. This is breaking a number of CTS tests on GL & Vulkan : KHR-GL45.pipeline_statistics_query_tests_ARB.functional_fragment_shader_invocations (GL) dEQP-VK.query_pool.statistics_query.fragment_shader_invocations.* (Vulkan) v2: Only use one whitelist entry (Lionel) Bspec: 14091 Signed-off-by: Lionel Landwerlin Cc: stable@vger.kernel.org # 6883eab27481: drm/i915: Support flags in whitlist WAs Cc: stable@vger.kernel.org Acked-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190628120720.21682-3-lionel.g.landwerlin@intel.com (cherry picked from commit 2c903da50f5a9522b134e488bd0f92646c46f3c0) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 50c0060509a6..b26c3549429e 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1098,10 +1098,25 @@ static void glk_whitelist_build(struct intel_engine_cs *engine) static void cfl_whitelist_build(struct intel_engine_cs *engine) { + struct i915_wa_list *w = &engine->whitelist; + if (engine->class != RENDER_CLASS) return; - gen9_whitelist_build(&engine->whitelist); + gen9_whitelist_build(w); + + /* + * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml + * + * This covers 4 register which are next to one another : + * - PS_INVOCATION_COUNT + * - PS_INVOCATION_COUNT_UDW + * - PS_DEPTH_COUNT + * - PS_DEPTH_COUNT_UDW + */ + whitelist_reg_ext(w, PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_RD | + RING_FORCE_TO_NONPRIV_RANGE_4); } static void cnl_whitelist_build(struct intel_engine_cs *engine) From cf8f9aa1eda7d916bd23f6b8c226404deb11690c Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 28 Jun 2019 15:07:20 +0300 Subject: [PATCH 091/277] drm/i915/icl: whitelist PS_(DEPTH|INVOCATION)_COUNT The same tests failing on CFL+ platforms are also failing on ICL. Documentation doesn't list the WaAllowPMDepthAndInvocationCountAccessFromUMD workaround for ICL but applying it fixes the same tests as CFL. v2: Use only one whitelist entry (Lionel) Signed-off-by: Lionel Landwerlin Tested-by: Anuj Phogat Cc: stable@vger.kernel.org # 6883eab27481: drm/i915: Support flags in whitlist WAs Cc: stable@vger.kernel.org Acked-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190628120720.21682-4-lionel.g.landwerlin@intel.com (cherry picked from commit 3fe0107e45ab396342497e06b8924cdd485cde3b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b26c3549429e..98dfb086320f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1144,6 +1144,19 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) /* WaEnableStateCacheRedirectToCS:icl */ whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); + + /* + * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl + * + * This covers 4 register which are next to one another : + * - PS_INVOCATION_COUNT + * - PS_INVOCATION_COUNT_UDW + * - PS_DEPTH_COUNT + * - PS_DEPTH_COUNT_UDW + */ + whitelist_reg_ext(w, PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_RD | + RING_FORCE_TO_NONPRIV_RANGE_4); break; case VIDEO_DECODE_CLASS: From fdcc789a4a0bb2ef01857095752be12b03cbb341 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Mon, 1 Jul 2019 13:44:42 +0300 Subject: [PATCH 092/277] drm/i915: Fix memleak in runtime wakeref tracking If we untrack wakerefs, the actual count may reach zero. However the krealloced owners array is still there and needs to be taken care of. Free the owners unconditionally to fix the leak. Fixes: bd780f37a361 ("drm/i915: Track all held rpm wakerefs") Reported-by: Juha-Pekka Heikkila Cc: Juha-Pekka Heikkila Cc: Chris Wilson Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190701104442.9319-1-mika.kuoppala@linux.intel.com (cherry picked from commit c5f846eed2a1856b78e988eeef08215c70598ecd) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_runtime_pm.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 502c54428570..8d1aebc3e857 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -221,13 +221,11 @@ __untrack_all_wakerefs(struct intel_runtime_pm_debug *debug, static void dump_and_free_wakeref_tracking(struct intel_runtime_pm_debug *debug) { - struct drm_printer p; + if (debug->count) { + struct drm_printer p = drm_debug_printer("i915"); - if (!debug->count) - return; - - p = drm_debug_printer("i915"); - __print_intel_runtime_pm_wakeref(&p, debug); + __print_intel_runtime_pm_wakeref(&p, debug); + } kfree(debug->owners); } From d1b739f326b960631827f0ea350002c5bc8df443 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 6 Jun 2019 15:42:10 +0300 Subject: [PATCH 093/277] drm/i915: Deal with machines that expose less than three QGV points MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When SAGV is forced to disabled/min/med/max in the BIOS pcode will only hand us a single QGV point instead of the normal three. Fix the code to deal with that instead declaring the bandwidth limit to be 0 MB/s (and thus preventing any planes from being enabled). Also shrink the max_bw sturct a bit while at it, and change the deratedbw type to unsigned since the code returns the bw as an unsigned int. Since we now keep track of how many qgv points we got from pcode we can drop the earlier check added for the "pcode doesn't support the memory subsystem query" case. Cc: felix.j.degrood@intel.com Cc: Mark Janes Cc: Matt Roper Cc: Clint Taylor Fixes: c457d9cf256e ("drm/i915: Make sure we have enough memory bandwidth on ICL") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110838 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190606124210.3482-1-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper (cherry picked from commit 56e9371bc3f3e7d6c1a197a45d550b2ce6af25f6) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_bw.c | 15 ++++++++++----- drivers/gpu/drm/i915/i915_drv.h | 5 +++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 753ac3165061..7b908e10d32e 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -178,6 +178,8 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv) clpchgroup = (sa->deburst * deinterleave / num_channels) << i; bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; + bi->num_qgv_points = qi.num_points; + for (j = 0; j < qi.num_points; j++) { const struct intel_qgv_point *sp = &qi.points[j]; int ct, bw; @@ -195,7 +197,7 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv) bi->deratedbw[j] = min(maxdebw, bw * 9 / 10); /* 90% */ - DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n", + DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%u\n", i, j, bi->num_planes, bi->deratedbw[j]); } @@ -211,14 +213,17 @@ static unsigned int icl_max_bw(struct drm_i915_private *dev_priv, { int i; - /* Did we initialize the bw limits successfully? */ - if (dev_priv->max_bw[0].num_planes == 0) - return UINT_MAX; - for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) { const struct intel_bw_info *bi = &dev_priv->max_bw[i]; + /* + * Pcode will not expose all QGV points when + * SAGV is forced to off/min/med/max. + */ + if (qgv_point >= bi->num_qgv_points) + return UINT_MAX; + if (num_planes >= bi->num_planes) return bi->deratedbw[qgv_point]; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bc909ec5d9c3..fe7a6ec2c199 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1674,8 +1674,9 @@ struct drm_i915_private { } dram_info; struct intel_bw_info { - int num_planes; - int deratedbw[3]; + unsigned int deratedbw[3]; /* for each QGV point */ + u8 num_qgv_points; + u8 num_planes; } max_bw[6]; struct drm_private_obj bw_obj; From f691eaa4801484fffc8a2bcb24caa27fb2edcce3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 Jul 2019 18:19:12 +0100 Subject: [PATCH 094/277] drm/i915/gtt: Defer the free for alloc error paths If we hit an error while allocating the page tables, we have to unwind the incomplete updates, and wish to free the unused pd. However, we are not allowed to be hoding the spinlock at that point, and so must use the later free to defer it until after we drop the lock. <3> [414.363795] BUG: sleeping function called from invalid context at drivers/gpu/drm/i915/i915_gem_gtt.c:472 <3> [414.364167] in_atomic(): 1, irqs_disabled(): 0, pid: 3905, name: i915_selftest <4> [414.364406] 3 locks held by i915_selftest/3905: <4> [414.364408] #0: 0000000034fe8aa8 (&dev->mutex){....}, at: device_driver_attach+0x18/0x50 <4> [414.364415] #1: 000000006bd8a560 (&dev->struct_mutex){+.+.}, at: igt_ctx_exec+0xb7/0x410 [i915] <4> [414.364476] #2: 000000003dfdc766 (&(&pd->lock)->rlock){+.+.}, at: gen8_ppgtt_alloc_pdp+0x448/0x540 [i915] <3> [414.364529] Preemption disabled at: <4> [414.364530] [<0000000000000000>] 0x0 <4> [414.364696] CPU: 0 PID: 3905 Comm: i915_selftest Tainted: G U 5.2.0-rc7-CI-CI_DRM_6403+ #1 <4> [414.364698] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014 <4> [414.364699] Call Trace: <4> [414.364704] dump_stack+0x67/0x9b <4> [414.364708] ___might_sleep+0x167/0x250 <4> [414.364777] vm_free_page+0x24/0xc0 [i915] <4> [414.364852] free_pd+0xf/0x20 [i915] <4> [414.364897] gen8_ppgtt_alloc_pdp+0x489/0x540 [i915] <4> [414.364946] gen8_ppgtt_alloc_4lvl+0x8e/0x2e0 [i915] <4> [414.364992] ppgtt_bind_vma+0x2e/0x60 [i915] <4> [414.365039] i915_vma_bind+0xe8/0x2c0 [i915] <4> [414.365088] __i915_vma_do_pin+0xa1/0xd20 [i915] Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111050 Fixes: 1d1b5490b91c ("drm/i915/gtt: Replace struct_mutex serialisation for allocation") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190703171913.16585-3-chris@chris-wilson.co.uk (cherry picked from commit 068610895ebd4bd86f496f01eb7b97e56d7269b2) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_gtt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8ab820145ea6..50fe72d40d8b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1446,7 +1446,8 @@ unwind_pd: gen8_ppgtt_set_pdpe(pdp, vm->scratch_pd, pdpe); GEM_BUG_ON(!atomic_read(&pdp->used)); atomic_dec(&pdp->used); - free_pd(vm, pd); + GEM_BUG_ON(alloc); + alloc = pd; /* defer the free to after the lock */ } spin_unlock(&pdp->lock); unwind: @@ -1515,7 +1516,8 @@ unwind_pdp: spin_lock(&pml4->lock); if (atomic_dec_and_test(&pdp->used)) { gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); - free_pd(vm, pdp); + GEM_BUG_ON(alloc); + alloc = pdp; /* defer the free until after the lock */ } spin_unlock(&pml4->lock); unwind: From 5f4c82c89ff0e11b31561aa7e547acb10bf650c2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Jul 2019 21:16:56 +0100 Subject: [PATCH 095/277] drm/i915/gtt: Mark the freed page table entries with scratch On unwinding the allocation error path and having freed the page table entry, it is imperative that we mark it as scratch. <4> [416.075569] general protection fault: 0000 [#1] PREEMPT SMP PTI <4> [416.075801] CPU: 0 PID: 2385 Comm: kworker/u2:11 Tainted: G U 5.2.0-rc7-CI-Patchwork_13534+ #1 <4> [416.076162] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014 <4> [416.076522] Workqueue: i915 __i915_vm_release [i915] <4> [416.076754] RIP: 0010:gen8_ppgtt_cleanup_3lvl+0x58/0xb0 [i915] <4> [416.077023] Code: 81 e2 04 fe ff ff 81 c2 ff 01 00 00 4c 8d 74 d6 58 4d 8b 65 00 4d 3b a7 28 02 00 00 74 40 49 8d 5c 24 50 49 81 c4 50 10 00 00 <48> 8b 2b 49 3b af 20 02 00 00 74 13 4c 89 ff 48 89 ee e8 01 fb ff <4> [416.077445] RSP: 0018:ffffc9000046bd98 EFLAGS: 00010206 <4> [416.077625] RAX: 0001000000000000 RBX: 6b6b6b6b6b6b6bbb RCX: 8b4b56d500000000 <4> [416.077838] RDX: 00000000000001ff RSI: ffff88805a578008 RDI: ffff88805bd0efc8 <4> [416.078167] RBP: ffff88805bd0efc8 R08: 0000000004e42b93 R09: 0000000000000001 <4> [416.078381] R10: 0000000000000000 R11: ffff888077a1b0b8 R12: 6b6b6b6b6b6b7bbb <4> [416.078594] R13: ffff88805a578058 R14: ffff88805a579058 R15: ffff88805bd0efc8 <4> [416.078815] FS: 0000000000000000(0000) GS:ffff88807da00000(0000) knlGS:0000000000000000 <4> [416.079395] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [416.079851] CR2: 000056160fec2b14 CR3: 0000000071bbc003 CR4: 00000000003606f0 <4> [416.080388] Call Trace: <4> [416.080828] gen8_ppgtt_cleanup+0x64/0x100 [i915] <4> [416.081399] __i915_vm_release+0xfc/0x1d0 [i915] Fixes: 1d1b5490b91c ("drm/i915/gtt: Replace struct_mutex serialisation for allocation") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Mika Kuoppala Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190704201656.15775-1-chris@chris-wilson.co.uk (cherry picked from commit e7539b79f703a6b533385088fc15cb5c9ab3f56f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 50fe72d40d8b..7015a97b1097 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1444,6 +1444,7 @@ unwind_pd: spin_lock(&pdp->lock); if (atomic_dec_and_test(&pd->used)) { gen8_ppgtt_set_pdpe(pdp, vm->scratch_pd, pdpe); + pdp->entry[pdpe] = vm->scratch_pd; GEM_BUG_ON(!atomic_read(&pdp->used)); atomic_dec(&pdp->used); GEM_BUG_ON(alloc); @@ -1516,6 +1517,7 @@ unwind_pdp: spin_lock(&pml4->lock); if (atomic_dec_and_test(&pdp->used)) { gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); + pml4->entry[pml4e] = vm->scratch_pdp; GEM_BUG_ON(alloc); alloc = pdp; /* defer the free until after the lock */ } From aa56a292ce623734ddd30f52d73f527d1f3529b5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 8 Jul 2019 15:03:27 +0100 Subject: [PATCH 096/277] drm/i915/userptr: Acquire the page lock around set_page_dirty() set_page_dirty says: For pages with a mapping this should be done under the page lock for the benefit of asynchronous memory errors who prefer a consistent dirty state. This rule can be broken in some special cases, but should be better not to. Under those rules, it is only safe for us to use the plain set_page_dirty calls for shmemfs/anonymous memory. Userptr may be used with real mappings and so needs to use the locked version (set_page_dirty_lock). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203317 Fixes: 5cc9ed4b9a7a ("drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl") References: 6dcc693bc57f ("ext4: warn when page is dirtied without buffers") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: stable@vger.kernel.org Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190708140327.26825-1-chris@chris-wilson.co.uk (cherry picked from commit cb6d7c7dc7ff8cace666ddec66334117a6068ce2) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 528b61678334..2caa594322bc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -664,7 +664,15 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, for_each_sgt_page(page, sgt_iter, pages) { if (obj->mm.dirty) - set_page_dirty(page); + /* + * As this may not be anonymous memory (e.g. shmem) + * but exist on a real mapping, we have to lock + * the page in order to dirty it -- holding + * the page reference is not sufficient to + * prevent the inode from being truncated. + * Play safe and take the lock. + */ + set_page_dirty_lock(page); mark_page_accessed(page); put_page(page); From 06c12ae3b401238477e65e8c4e04e065699a6115 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 9 Jul 2019 15:33:39 +0300 Subject: [PATCH 097/277] drm/i915/perf: ensure we keep a reference on the driver The i915 perf stream has its own file descriptor and is tied to reference of the driver. We haven't taken care of keep the driver alive. Signed-off-by: Lionel Landwerlin Suggested-by: Chris Wilson Fixes: eec688e1420da5 ("drm/i915: Add i915 perf infrastructure") Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190709123351.5645-2-lionel.g.landwerlin@intel.com (cherry picked from commit a5af1df716c123a09341351008fc497bea137b77) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_perf.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 1ae06a1b6749..629511ea9a18 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2515,6 +2515,9 @@ static int i915_perf_release(struct inode *inode, struct file *file) i915_perf_destroy_locked(stream); mutex_unlock(&dev_priv->perf.lock); + /* Release the reference the perf stream kept on the driver. */ + drm_dev_put(&dev_priv->drm); + return 0; } @@ -2650,6 +2653,11 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, if (!(param->flags & I915_PERF_FLAG_DISABLED)) i915_perf_enable_locked(stream); + /* Take a reference on the driver that will be kept with stream_fd + * until its release. + */ + drm_dev_get(&dev_priv->drm); + return stream_fd; err_open: From 8f48de49795ca52f70c96558ccc6a0c174504779 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 10 Jul 2019 11:55:24 +0100 Subject: [PATCH 098/277] drm/i915/perf: add missing delay for OA muxes configuration This was dropped from the original patch series, we weren't sure whether it was needed at the time. More recent tests show it's definitely needed to have acurate performance data. Signed-off-by: Lionel Landwerlin Fixes: 19f81df2859eb1 ("drm/i915/perf: Add OA unit support for Gen 8+") Acked-by: Chris Wilson [ickle: combine duplicate code and comments] Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190710105524.23017-1-chris@chris-wilson.co.uk (cherry picked from commit 14bfcd3e0daeb0f757a02aac85fd03e0933ab37e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_perf.c | 49 ++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 629511ea9a18..5140017f9a39 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1567,28 +1567,10 @@ static void config_oa_regs(struct drm_i915_private *dev_priv, } } -static int hsw_enable_metric_set(struct i915_perf_stream *stream) +static void delay_after_mux(void) { - struct drm_i915_private *dev_priv = stream->dev_priv; - const struct i915_oa_config *oa_config = stream->oa_config; - - /* PRM: - * - * OA unit is using “crclk” for its functionality. When trunk - * level clock gating takes place, OA clock would be gated, - * unable to count the events from non-render clock domain. - * Render clock gating must be disabled when OA is enabled to - * count the events from non-render domain. Unit level clock - * gating for RCS should also be disabled. - */ - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & - ~GEN7_DOP_CLOCK_GATE_ENABLE)); - I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | - GEN6_CSUNIT_CLOCK_GATE_DISABLE)); - - config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); - - /* It apparently takes a fairly long time for a new MUX + /* + * It apparently takes a fairly long time for a new MUX * configuration to be be applied after these register writes. * This delay duration was derived empirically based on the * render_basic config but hopefully it covers the maximum @@ -1610,6 +1592,30 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream) * a delay at this location would mitigate any invalid reports. */ usleep_range(15000, 20000); +} + +static int hsw_enable_metric_set(struct i915_perf_stream *stream) +{ + struct drm_i915_private *dev_priv = stream->dev_priv; + const struct i915_oa_config *oa_config = stream->oa_config; + + /* + * PRM: + * + * OA unit is using “crclk” for its functionality. When trunk + * level clock gating takes place, OA clock would be gated, + * unable to count the events from non-render clock domain. + * Render clock gating must be disabled when OA is enabled to + * count the events from non-render domain. Unit level clock + * gating for RCS should also be disabled. + */ + I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & + ~GEN7_DOP_CLOCK_GATE_ENABLE)); + I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | + GEN6_CSUNIT_CLOCK_GATE_DISABLE)); + + config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); + delay_after_mux(); config_oa_regs(dev_priv, oa_config->b_counter_regs, oa_config->b_counter_regs_len); @@ -1835,6 +1841,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) return ret; config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); + delay_after_mux(); config_oa_regs(dev_priv, oa_config->b_counter_regs, oa_config->b_counter_regs_len); From 982b1d002f16c2695871e005c4132060c836db56 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Jul 2019 09:09:28 +0100 Subject: [PATCH 099/277] drm/i915: Lock the engine while dumping the active request We cannot let the request be retired and freed while we are trying to dump it during error capture. It is not sufficient just to grab a reference to the request, as during retirement we may free the ring which we are also dumping. So take the engine lock to prevent retiring and freeing of the request. Reported-by: Alex Shumsky Fixes: 83c317832eb1 ("drm/i915: Dump the ringbuffer of the active request for debugging") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Alex Shumsky Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190715080946.15593-6-chris@chris-wilson.co.uk (cherry picked from commit cfe7288c276e359eebf057699fe86c2f8af14224) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 11 ++++------- drivers/gpu/drm/i915/i915_gpu_error.c | 6 ++++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 7fd33e81c2d9..aa5a1f11a91b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1471,6 +1471,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct i915_gpu_error * const error = &engine->i915->gpu_error; struct i915_request *rq; intel_wakeref_t wakeref; + unsigned long flags; if (header) { va_list ap; @@ -1490,10 +1491,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, i915_reset_engine_count(error, engine), i915_reset_count(error)); - rcu_read_lock(); - drm_printf(m, "\tRequests:\n"); + spin_lock_irqsave(&engine->active.lock, flags); rq = intel_engine_find_active_request(engine); if (rq) { print_request(m, rq, "\t\tactive "); @@ -1513,8 +1513,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, print_request_ring(m, rq); } - - rcu_read_unlock(); + spin_unlock_irqrestore(&engine->active.lock, flags); wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm); if (wakeref) { @@ -1672,7 +1671,6 @@ struct i915_request * intel_engine_find_active_request(struct intel_engine_cs *engine) { struct i915_request *request, *active = NULL; - unsigned long flags; /* * We are called by the error capture, reset and to dump engine @@ -1685,7 +1683,7 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) * At all other times, we must assume the GPU is still running, but * we only care about the snapshot of this moment. */ - spin_lock_irqsave(&engine->active.lock, flags); + lockdep_assert_held(&engine->active.lock); list_for_each_entry(request, &engine->active.requests, sched.link) { if (i915_request_completed(request)) continue; @@ -1700,7 +1698,6 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) active = request; break; } - spin_unlock_irqrestore(&engine->active.lock, flags); return active; } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 41a511d5267f..8bc76fcff70d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1418,6 +1418,7 @@ static void gem_record_rings(struct i915_gpu_state *error) struct intel_engine_cs *engine = i915->engine[i]; struct drm_i915_error_engine *ee = &error->engine[i]; struct i915_request *request; + unsigned long flags; ee->engine_id = -1; @@ -1429,10 +1430,11 @@ static void gem_record_rings(struct i915_gpu_state *error) error_record_engine_registers(error, engine, ee); error_record_engine_execlists(engine, ee); + spin_lock_irqsave(&engine->active.lock, flags); request = intel_engine_find_active_request(engine); if (request) { struct i915_gem_context *ctx = request->gem_context; - struct intel_ring *ring; + struct intel_ring *ring = request->ring; ee->vm = ctx->vm ?: &ggtt->vm; @@ -1462,7 +1464,6 @@ static void gem_record_rings(struct i915_gpu_state *error) ee->rq_post = request->postfix; ee->rq_tail = request->tail; - ring = request->ring; ee->cpu_ring_head = ring->head; ee->cpu_ring_tail = ring->tail; ee->ringbuffer = @@ -1470,6 +1471,7 @@ static void gem_record_rings(struct i915_gpu_state *error) engine_record_requests(engine, request, ee); } + spin_unlock_irqrestore(&engine->active.lock, flags); ee->hws_page = i915_error_object_create(i915, From a8f196a0fa6391a436f63f360a1fb57031fdf26c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 17 Jul 2019 14:45:36 +0300 Subject: [PATCH 100/277] drm/i915: Make sure cdclk is high enough for DP audio on VLV/CHV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On VLV/CHV there is some kind of linkage between the cdclk frequency and the DP link frequency. The spec says: "For DP audio configuration, cdclk frequency shall be set to meet the following requirements: DP Link Frequency(MHz) | Cdclk frequency(MHz) 270 | 320 or higher 162 | 200 or higher" I suspect that would more accurately be expressed as "cdclk >= DP link clock", and in any case we can express it like that in the code because of the limited set of cdclk (200, 266, 320, 400 MHz) and link frequencies (162 and 270 MHz) we support. Without this we can end up in a situation where the cdclk is too low and enabling DP audio will kill the pipe. Happens eg. with 2560x1440 modes where the 266MHz cdclk is sufficient to pump the pixels (241.5 MHz dotclock) but is too low for the DP audio due to the link frequency being 270 MHz. v2: Spell out the cdclk and link frequencies we actually support Cc: stable@vger.kernel.org Tested-by: Stefan Gottwald Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111149 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190717114536.22937-1-ville.syrjala@linux.intel.com Acked-by: Chris Wilson (cherry picked from commit bffb31f73b29a60ef693842d8744950c2819851d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_cdclk.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 8993ab283562..0d19bbd08122 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2239,6 +2239,17 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9) min_cdclk = max(2 * 96000, min_cdclk); + /* + * "For DP audio configuration, cdclk frequency shall be set to + * meet the following requirements: + * DP Link Frequency(MHz) | Cdclk frequency(MHz) + * 270 | 320 or higher + * 162 | 200 or higher" + */ + if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + intel_crtc_has_dp_encoder(crtc_state) && crtc_state->has_audio) + min_cdclk = max(crtc_state->port_clock, min_cdclk); + /* * On Valleyview some DSI panels lose (v|h)sync when the clock is lower * than 320000KHz. From 6d61f716a01ec0e134de38ae97e71d6fec5a6ff6 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Wed, 17 Jul 2019 15:34:51 -0700 Subject: [PATCH 101/277] drm/i915/vbt: Fix VBT parsing for the PSR section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A single 32-bit PSR2 training pattern field follows the sixteen element array of PSR table entries in the VBT spec. But, we incorrectly define this PSR2 field for each of the PSR table entries. As a result, the PSR1 training pattern duration for any panel_type != 0 will be parsed incorrectly. Secondly, PSR2 training pattern durations for VBTs with bdb version >= 226 will also be wrong. Cc: Rodrigo Vivi Cc: José Roberto de Souza Cc: stable@vger.kernel.org Cc: stable@vger.kernel.org #v5.2 Fixes: 88a0d9606aff ("drm/i915/vbt: Parse and use the new field with PSR2 TP2/3 wakeup time") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111088 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=204183 Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Acked-by: Rodrigo Vivi Tested-by: François Guerraz Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20190717223451.2595-1-dhinakaran.pandiyan@intel.com (cherry picked from commit b5ea9c9337007d6e700280c8a60b4e10d070fb53) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_bios.c | 2 +- drivers/gpu/drm/i915/display/intel_vbt_defs.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index c4710889cb32..3ef4e9f573cf 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -765,7 +765,7 @@ parse_psr(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) } if (bdb->version >= 226) { - u32 wakeup_time = psr_table->psr2_tp2_tp3_wakeup_time; + u32 wakeup_time = psr->psr2_tp2_tp3_wakeup_time; wakeup_time = (wakeup_time >> (2 * panel_type)) & 0x3; switch (wakeup_time) { diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index 2f4894e9a03d..5ddbe71ab423 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -478,13 +478,13 @@ struct psr_table { /* TP wake up time in multiple of 100 */ u16 tp1_wakeup_time; u16 tp2_tp3_wakeup_time; - - /* PSR2 TP2/TP3 wakeup time for 16 panels */ - u32 psr2_tp2_tp3_wakeup_time; } __packed; struct bdb_psr { struct psr_table psr_table[16]; + + /* PSR2 TP2/TP3 wakeup time for 16 panels */ + u32 psr2_tp2_tp3_wakeup_time; } __packed; /* From 0bbfdce345c8cf01a3a985fa99fefd2146dcc748 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 17 Jul 2019 19:06:19 +0100 Subject: [PATCH 102/277] drm/i915: Fix GEN8_MCR_SELECTOR programming fls returns bit positions starting from one for the lsb and the MCR register expects zero based (sub)slice addressing. Incorrent MCR programming can have the effect of directing MMIO reads of registers in the 0xb100-0xb3ff range to invalid subslice returning zeroes instead of actual content. Signed-off-by: Tvrtko Ursulin Fixes: 1e40d4aea57b ("drm/i915/cnl: Implement WaProgramMgsrForCorrectSliceSpecificMmioReads") Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190717180624.20354-2-tvrtko.ursulin@linux.intel.com (cherry picked from commit 15160879d47213c32f357bc67b6014d9aaf14ed7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index aa5a1f11a91b..f25632c9b292 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -969,9 +969,14 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type) u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv) { const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; + unsigned int slice = fls(sseu->slice_mask) - 1; + unsigned int subslice; u32 mcr_s_ss_select; - u32 slice = fls(sseu->slice_mask); - u32 subslice = fls(sseu->subslice_mask[slice]); + + GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask)); + subslice = fls(sseu->subslice_mask[slice]); + GEM_BUG_ON(!subslice); + subslice--; if (IS_GEN(dev_priv, 10)) mcr_s_ss_select = GEN8_MCR_SLICE(slice) | From 89f5752307cf53010d97503ac501b2ca1b089922 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 28 Jun 2019 17:36:18 +0300 Subject: [PATCH 103/277] drm/i915: Fix the TBT AUX power well enabling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the mapping from a TBT AUX power well index to the DP_AUX_CH_CTL register. Fixes: c7375d9542f1 ("drm/i915: Configure AUX_CH_CTL when enabling the AUX power domain") Cc: José Roberto de Souza Cc: Rodrigo Vivi Signed-off-by: Imre Deak Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190628143635.22066-7-imre.deak@intel.com (cherry picked from commit 29ae36abf08f943b76a2959f5000c44efa335be7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_power.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index c93ad512014c..2d1939db108f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -438,16 +438,23 @@ icl_combo_phy_aux_power_well_disable(struct drm_i915_private *dev_priv, #define ICL_AUX_PW_TO_CH(pw_idx) \ ((pw_idx) - ICL_PW_CTL_IDX_AUX_A + AUX_CH_A) +#define ICL_TBT_AUX_PW_TO_CH(pw_idx) \ + ((pw_idx) - ICL_PW_CTL_IDX_AUX_TBT1 + AUX_CH_C) + static void icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - enum aux_ch aux_ch = ICL_AUX_PW_TO_CH(power_well->desc->hsw.idx); + int pw_idx = power_well->desc->hsw.idx; + bool is_tbt = power_well->desc->hsw.is_tc_tbt; + enum aux_ch aux_ch; u32 val; + aux_ch = is_tbt ? ICL_TBT_AUX_PW_TO_CH(pw_idx) : + ICL_AUX_PW_TO_CH(pw_idx); val = I915_READ(DP_AUX_CH_CTL(aux_ch)); val &= ~DP_AUX_CH_CTL_TBT_IO; - if (power_well->desc->hsw.is_tc_tbt) + if (is_tbt) val |= DP_AUX_CH_CTL_TBT_IO; I915_WRITE(DP_AUX_CH_CTL(aux_ch), val); From 8aa259b10a6a759c50137bbbf225df0c17ca5d27 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 18 Jul 2019 10:30:21 -0700 Subject: [PATCH 104/277] libbpf: fix missing __WORDSIZE definition hashmap.h depends on __WORDSIZE being defined. It is defined by glibc/musl in different headers. It's an explicit goal for musl to be "non-detectable" at compilation time, so instead include glibc header if glibc is explicitly detected and fall back to musl header otherwise. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Andrii Nakryiko Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Fixes: e3b924224028 ("libbpf: add resizable non-thread safe internal hashmap") Link: https://lkml.kernel.org/r/20190718173021.2418606-1-andriin@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/bpf/hashmap.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h index 03748a742146..bae8879cdf58 100644 --- a/tools/lib/bpf/hashmap.h +++ b/tools/lib/bpf/hashmap.h @@ -10,6 +10,11 @@ #include #include +#ifdef __GLIBC__ +#include +#else +#include +#endif #include "libbpf_internal.h" static inline size_t hash_bits(size_t h, int bits) From 3884ae44f41247e0ae41952d4fad46db86d2e0a8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jul 2019 10:08:28 +0200 Subject: [PATCH 105/277] pidfd: remove obsolete comments from test Since the introduction of CLONE_PIDFD pidfd_send_signal() is independent of CONFIG_PROC_FS. Signed-off-by: Christian Brauner --- tools/testing/selftests/pidfd/pidfd_test.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index 7eaa8a3de262..b632965e60eb 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -339,13 +339,9 @@ static int test_pidfd_send_signal_syscall_support(void) ret = sys_pidfd_send_signal(pidfd, 0, NULL, 0); if (ret < 0) { - /* - * pidfd_send_signal() will currently return ENOSYS when - * CONFIG_PROC_FS is not set. - */ if (errno == ENOSYS) ksft_exit_skip( - "%s test: pidfd_send_signal() syscall not supported (Ensure that CONFIG_PROC_FS=y is set)\n", + "%s test: pidfd_send_signal() syscall not supported\n", test_name); ksft_exit_fail_msg("%s test: Failed to send signal\n", From 1caf7d50f46bd0388e38e653b146aa81700e8eb8 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Wed, 24 Jul 2019 12:48:16 -0400 Subject: [PATCH 106/277] pidfd: Add warning if exit_state is 0 during notification Previously a condition got missed where the pidfd waiters are awakened before the exit_state gets set. This can result in a missed notification [1] and the polling thread waiting forever. It is fixed now, however it would be nice to avoid this kind of issue going unnoticed in the future. So just add a warning to catch it in the future. /* References */ [1]: https://lore.kernel.org/lkml/20190717172100.261204-1-joel@joelfernandes.org/ Signed-off-by: Joel Fernandes (Google) Link: https://lore.kernel.org/r/20190724164816.201099-1-joel@joelfernandes.org Signed-off-by: Christian Brauner --- kernel/signal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/signal.c b/kernel/signal.c index 91b789dd6e72..349f5a67f100 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1885,6 +1885,7 @@ static void do_notify_pidfd(struct task_struct *task) { struct pid *pid; + WARN_ON(task->exit_state == 0); pid = task_pid(task); wake_up_all(&pid->wait_pidfd); } From f14312a93b34b9350dc33ff0b4215c24f4c82617 Mon Sep 17 00:00:00 2001 From: Enrico Weigelt Date: Thu, 25 Jul 2019 21:06:03 +0200 Subject: [PATCH 107/277] platform/x86: pcengines-apuv2: use KEY_RESTART for front button The keycode KEY_RESTART is more appropriate for the front button, as most people use it for things like restart or factory reset. Signed-off-by: Enrico Weigelt Fixes: f8eb0235f659 ("x86: pcengines apuv2 gpio/leds/keys platform driver") Signed-off-by: Andy Shevchenko --- drivers/platform/x86/pcengines-apuv2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/pcengines-apuv2.c b/drivers/platform/x86/pcengines-apuv2.c index 96b499c6929a..e4c68efac0c2 100644 --- a/drivers/platform/x86/pcengines-apuv2.c +++ b/drivers/platform/x86/pcengines-apuv2.c @@ -93,7 +93,7 @@ static struct gpiod_lookup_table gpios_led_table = { static struct gpio_keys_button apu2_keys_buttons[] = { { - .code = KEY_SETUP, + .code = KEY_RESTART, .active_low = 1, .desc = "front button", .type = EV_KEY, From d655e5b4e1c8ce207f0a1868aa334c4ecdcbddfb Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 23 Jul 2019 10:24:51 +0200 Subject: [PATCH 108/277] s390: clean up qdio.h Fix two typos, document missing fields in the driver initialization data and remove the copy&pasted 'pfmt' field from the qdr struct. Signed-off-by: Julian Wiedmann Reviewed-by: Jens Remus Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/qdio.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index db5ef22c46e4..f647d565bd6d 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -28,7 +28,7 @@ * @sliba: storage list information block address * @sla: storage list address * @slsba: storage list state block address - * @akey: access key for DLIB + * @akey: access key for SLIB * @bkey: access key for SL * @ckey: access key for SBALs * @dkey: access key for SLSB @@ -50,11 +50,10 @@ struct qdesfmt0 { /** * struct qdr - queue description record (QDR) * @qfmt: queue format - * @pfmt: implementation dependent parameter format * @ac: adapter characteristics * @iqdcnt: input queue descriptor count * @oqdcnt: output queue descriptor count - * @iqdsz: inpout queue descriptor size + * @iqdsz: input queue descriptor size * @oqdsz: output queue descriptor size * @qiba: queue information block address * @qkey: queue information block key @@ -62,8 +61,7 @@ struct qdesfmt0 { */ struct qdr { u32 qfmt : 8; - u32 pfmt : 8; - u32 : 8; + u32 : 16; u32 ac : 8; u32 : 8; u32 iqdcnt : 8; @@ -327,6 +325,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, * struct qdio_initialize - qdio initialization data * @cdev: associated ccw device * @q_format: queue format + * @qdr_ac: feature flags to set * @adapter_name: name for the adapter * @qib_param_field_format: format for qib_parm_field * @qib_param_field: pointer to 128 bytes or NULL, if no param field @@ -338,6 +337,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, * @input_handler: handler to be called for input queues * @output_handler: handler to be called for output queues * @queue_start_poll_array: polling handlers (one per input queue or NULL) + * @scan_threshold: # of in-use buffers that triggers scan on output queue * @int_parm: interruption parameter * @input_sbal_addr_array: address of no_input_qs * 128 pointers * @output_sbal_addr_array: address of no_output_qs * 128 pointers From 3361f3193c747e8b32b9edf60f2567187ed4faef Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 25 Jul 2019 13:52:08 +0200 Subject: [PATCH 109/277] s390: update configs Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 330 ++++++++++++++++----------- arch/s390/configs/defconfig | 321 +++++++++++++++----------- arch/s390/configs/zfcpdump_defconfig | 31 +-- 3 files changed, 410 insertions(+), 272 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index e26d4413d34c..74e78ec5beb6 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -3,6 +3,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_PREEMPT=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y @@ -18,55 +19,71 @@ CONFIG_BLK_CGROUP=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_RDMA=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y CONFIG_CPUSETS=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_BPF=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y +CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set -CONFIG_CHECKPOINT_RESTORE=y CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y +CONFIG_LIVEPATCH=y +CONFIG_TUNE_ZEC12=y +CONFIG_NR_CPUS=512 +CONFIG_NUMA=y +CONFIG_HZ_100=y +CONFIG_KEXEC_FILE=y +CONFIG_EXPOLINE=y +CONFIG_EXPOLINE_AUTO=y +CONFIG_CHSC_SCH=y +CONFIG_VFIO_CCW=m +CONFIG_VFIO_AP=m +CONFIG_CRASH_DUMP=y +CONFIG_HIBERNATION=y +CONFIG_PM_DEBUG=y +CONFIG_CMM=m +CONFIG_APPLDATA_BASE=y +CONFIG_KVM=m +CONFIG_VHOST_NET=m +CONFIG_VHOST_VSOCK=m CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_STATIC_KEYS_SELFTEST=y +CONFIG_REFCOUNT_FULL=y +CONFIG_LOCK_EVENT_COUNTS=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_SIG_SHA256=y CONFIG_BLK_DEV_INTEGRITY=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y -CONFIG_BLK_WBT_SQ=y +CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_BSD_DISKLABEL=y CONFIG_MINIX_SUBPARTITION=y CONFIG_SOLARIS_X86_PARTITION=y CONFIG_UNIXWARE_DISKLABEL=y -CONFIG_CFQ_GROUP_IOSCHED=y -CONFIG_DEFAULT_DEADLINE=y -CONFIG_LIVEPATCH=y -CONFIG_TUNE_ZEC12=y -CONFIG_NR_CPUS=512 -CONFIG_NUMA=y -CONFIG_PREEMPT=y -CONFIG_HZ_100=y -CONFIG_KEXEC_FILE=y -CONFIG_KEXEC_VERIFY_SIG=y -CONFIG_EXPOLINE=y -CONFIG_EXPOLINE_AUTO=y +CONFIG_IOSCHED_BFQ=y +CONFIG_BFQ_GROUP_IOSCHED=y +CONFIG_BINFMT_MISC=m CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y @@ -82,17 +99,8 @@ CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y -CONFIG_PCI=y -CONFIG_PCI_DEBUG=y -CONFIG_HOTPLUG_PCI=y -CONFIG_HOTPLUG_PCI_S390=y -CONFIG_CHSC_SCH=y -CONFIG_VFIO_AP=m -CONFIG_VFIO_CCW=m -CONFIG_CRASH_DUMP=y -CONFIG_BINFMT_MISC=m -CONFIG_HIBERNATION=y -CONFIG_PM_DEBUG=y +CONFIG_PERCPU_STATS=y +CONFIG_GUP_BENCHMARK=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -121,9 +129,6 @@ CONFIG_NET_IPVTI=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_TCP_CONG_ADVANCED=y @@ -139,10 +144,6 @@ CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_MODE_TRANSPORT=m -CONFIG_INET6_XFRM_MODE_TUNNEL=m -CONFIG_INET6_XFRM_MODE_BEET=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m CONFIG_IPV6_VTI=m CONFIG_IPV6_SIT=m CONFIG_IPV6_GRE=m @@ -264,11 +265,8 @@ CONFIG_IP_VS_SED=m CONFIG_IP_VS_NQ=m CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m -CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NF_TABLES_IPV4=y -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -287,10 +285,7 @@ CONFIG_IP_NF_SECURITY=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_TABLES_IPV6=y -CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -309,7 +304,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_SECURITY=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_NF_TABLES_BRIDGE=y +CONFIG_NF_TABLES_BRIDGE=m CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m @@ -375,9 +370,11 @@ CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m +CONFIG_PCI=y +CONFIG_PCI_DEBUG=y +CONFIG_HOTPLUG_PCI=y +CONFIG_HOTPLUG_PCI_S390=y CONFIG_DEVTMPFS=y -CONFIG_DMA_CMA=y -CONFIG_CMA_SIZE_MBYTES=0 CONFIG_CONNECTOR=y CONFIG_ZRAM=m CONFIG_BLK_DEV_LOOP=m @@ -395,7 +392,6 @@ CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m CONFIG_BLK_DEV_SR=m CONFIG_CHR_DEV_SG=y CONFIG_CHR_DEV_SCH=m @@ -415,17 +411,19 @@ CONFIG_SCSI_DH_RDAC=m CONFIG_SCSI_DH_HP_SW=m CONFIG_SCSI_DH_EMC=m CONFIG_SCSI_DH_ALUA=m -CONFIG_SCSI_OSD_INITIATOR=m -CONFIG_SCSI_OSD_ULD=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=m CONFIG_MD_MULTIPATH=m CONFIG_MD_FAULTY=m +CONFIG_MD_CLUSTER=m +CONFIG_BCACHE=m CONFIG_BLK_DEV_DM=m +CONFIG_DM_UNSTRIPED=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m +CONFIG_DM_WRITECACHE=m CONFIG_DM_MIRROR=m CONFIG_DM_LOG_USERSPACE=m CONFIG_DM_RAID=m @@ -445,23 +443,78 @@ CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m -CONFIG_VXLAN=m CONFIG_TUN=m CONFIG_VETH=m CONFIG_VIRTIO_NET=m CONFIG_NLMON=m +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_NET_VENDOR_ADAPTEC is not set +# CONFIG_NET_VENDOR_AGERE is not set +# CONFIG_NET_VENDOR_ALACRITECH is not set +# CONFIG_NET_VENDOR_ALTEON is not set +# CONFIG_NET_VENDOR_AMAZON is not set +# CONFIG_NET_VENDOR_AMD is not set +# CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_ATHEROS is not set +# CONFIG_NET_VENDOR_AURORA is not set +# CONFIG_NET_VENDOR_BROADCOM is not set +# CONFIG_NET_VENDOR_BROCADE is not set +# CONFIG_NET_VENDOR_CADENCE is not set +# CONFIG_NET_VENDOR_CAVIUM is not set # CONFIG_NET_VENDOR_CHELSIO is not set +# CONFIG_NET_VENDOR_CISCO is not set +# CONFIG_NET_VENDOR_CORTINA is not set +# CONFIG_NET_VENDOR_DEC is not set +# CONFIG_NET_VENDOR_DLINK is not set +# CONFIG_NET_VENDOR_EMULEX is not set +# CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_GOOGLE is not set +# CONFIG_NET_VENDOR_HP is not set +# CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y +# CONFIG_MLXFW is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_MICROCHIP is not set +# CONFIG_NET_VENDOR_MICROSEMI is not set +# CONFIG_NET_VENDOR_MYRI is not set # CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_NETERION is not set +# CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_NI is not set +# CONFIG_NET_VENDOR_NVIDIA is not set +# CONFIG_NET_VENDOR_OKI is not set +# CONFIG_NET_VENDOR_PACKET_ENGINES is not set +# CONFIG_NET_VENDOR_QLOGIC is not set +# CONFIG_NET_VENDOR_QUALCOMM is not set +# CONFIG_NET_VENDOR_RDC is not set +# CONFIG_NET_VENDOR_REALTEK is not set +# CONFIG_NET_VENDOR_RENESAS is not set +# CONFIG_NET_VENDOR_ROCKER is not set +# CONFIG_NET_VENDOR_SAMSUNG is not set +# CONFIG_NET_VENDOR_SEEQ is not set +# CONFIG_NET_VENDOR_SOLARFLARE is not set +# CONFIG_NET_VENDOR_SILAN is not set +# CONFIG_NET_VENDOR_SIS is not set +# CONFIG_NET_VENDOR_SMSC is not set +# CONFIG_NET_VENDOR_SOCIONEXT is not set +# CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SUN is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set +# CONFIG_NET_VENDOR_TEHUTI is not set +# CONFIG_NET_VENDOR_TI is not set +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m +CONFIG_PPP_FILTER=y CONFIG_PPP_MPPE=m +CONFIG_PPP_MULTILINK=y CONFIG_PPPOE=m CONFIG_PPTP=m CONFIG_PPPOL2TP=m @@ -473,10 +526,13 @@ CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=0 +CONFIG_NULL_TTY=m CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m CONFIG_HANGCHECK_TIMER=m CONFIG_TN3270_FS=y +CONFIG_PPS=m +# CONFIG_PTP_1588_CLOCK is not set # CONFIG_HWMON is not set CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y @@ -498,8 +554,8 @@ CONFIG_VFIO_MDEV_DEVICE=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y -CONFIG_S390_AP_IOMMU=y CONFIG_S390_CCW_IOMMU=y +CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -519,6 +575,7 @@ CONFIG_OCFS2_FS=m CONFIG_BTRFS_FS=y CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_BTRFS_DEBUG=y +CONFIG_BTRFS_ASSERT=y CONFIG_NILFS2_FS=m CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y @@ -552,8 +609,10 @@ CONFIG_ECRYPT_FS=m CONFIG_CRAMFS=m CONFIG_SQUASHFS=m CONFIG_SQUASHFS_XATTR=y +CONFIG_SQUASHFS_LZ4=y CONFIG_SQUASHFS_LZO=y CONFIG_SQUASHFS_XZ=y +CONFIG_SQUASHFS_ZSTD=y CONFIG_ROMFS_FS=m CONFIG_NFS_FS=m CONFIG_NFS_V3_ACL=y @@ -564,7 +623,6 @@ CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_NFSD_V4_SECURITY_LABEL=y CONFIG_CIFS=m -CONFIG_CIFS_STATS=y CONFIG_CIFS_STATS2=y CONFIG_CIFS_WEAK_PW_HASH=y CONFIG_CIFS_UPCALL=y @@ -580,19 +638,112 @@ CONFIG_NLS_ISO8859_1=m CONFIG_NLS_ISO8859_15=m CONFIG_NLS_UTF8=m CONFIG_DLM=m +CONFIG_UNICODE=y +CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y +CONFIG_ENCRYPTED_KEYS=m +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_FORTIFY_SOURCE=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_INTEGRITY_SIGNATURE=y +CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y +CONFIG_IMA=y +CONFIG_IMA_DEFAULT_HASH_SHA256=y +CONFIG_IMA_WRITE_POLICY=y +CONFIG_IMA_APPRAISE=y +CONFIG_CRYPTO_USER=m +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_PCRYPT=m +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_AEGIS128=m +CONFIG_CRYPTO_AEGIS128L=m +CONFIG_CRYPTO_AEGIS256=m +CONFIG_CRYPTO_MORUS640=m +CONFIG_CRYPTO_MORUS1280=m +CONFIG_CRYPTO_CFB=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_CRC32=m +CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD128=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_RMD256=m +CONFIG_CRYPTO_RMD320=m +CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAMELLIA=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_FCRYPT=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_842=m +CONFIG_CRYPTO_LZ4=m +CONFIG_CRYPTO_LZ4HC=m +CONFIG_CRYPTO_ZSTD=m +CONFIG_CRYPTO_ANSI_CPRNG=m +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_CRYPTO_USER_API_SKCIPHER=m +CONFIG_CRYPTO_USER_API_RNG=m +CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_STATS=y +CONFIG_ZCRYPT=m +CONFIG_PKEY=m +CONFIG_CRYPTO_PAES_S390=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_GHASH_S390=m +CONFIG_CRYPTO_CRC32_S390=y +CONFIG_CORDIC=m +CONFIG_CRC32_SELFTEST=y +CONFIG_CRC4=m +CONFIG_CRC7=m +CONFIG_CRC8=m +CONFIG_RANDOM32_SELFTEST=y +CONFIG_DMA_CMA=y +CONFIG_CMA_SIZE_MBYTES=0 +CONFIG_DMA_API_DEBUG=y +CONFIG_STRING_SELFTEST=y CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_GDB_SCRIPTS=y CONFIG_FRAME_WARN=1024 -CONFIG_READABLE_ASM=y CONFIG_UNUSED_SYMBOLS=y CONFIG_HEADERS_INSTALL=y CONFIG_HEADERS_CHECK=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_PAGEALLOC=y +CONFIG_PAGE_OWNER=y CONFIG_DEBUG_RODATA_TEST=y CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_SELFTEST=y @@ -645,7 +796,6 @@ CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y -CONFIG_DMA_API_DEBUG=y CONFIG_LKDTM=m CONFIG_TEST_LIST_SORT=y CONFIG_TEST_SORT=y @@ -657,85 +807,3 @@ CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BPF=m CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_S390_PTDUMP=y -CONFIG_PERSISTENT_KEYRINGS=y -CONFIG_BIG_KEYS=y -CONFIG_ENCRYPTED_KEYS=m -CONFIG_SECURITY=y -CONFIG_SECURITY_NETWORK=y -CONFIG_FORTIFY_SOURCE=y -CONFIG_SECURITY_SELINUX=y -CONFIG_SECURITY_SELINUX_BOOTPARAM=y -CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 -CONFIG_SECURITY_SELINUX_DISABLE=y -CONFIG_INTEGRITY_SIGNATURE=y -CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y -CONFIG_IMA=y -CONFIG_IMA_DEFAULT_HASH_SHA256=y -CONFIG_IMA_WRITE_POLICY=y -CONFIG_IMA_APPRAISE=y -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m -CONFIG_CRYPTO_USER=m -# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set -CONFIG_CRYPTO_PCRYPT=m -CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD128=m -CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_TGR192=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_AES_TI=m -CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_FCRYPT=m -CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m -CONFIG_CRYPTO_SEED=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_TEA=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_842=m -CONFIG_CRYPTO_LZ4=m -CONFIG_CRYPTO_LZ4HC=m -CONFIG_CRYPTO_ANSI_CPRNG=m -CONFIG_CRYPTO_USER_API_HASH=m -CONFIG_CRYPTO_USER_API_SKCIPHER=m -CONFIG_CRYPTO_USER_API_RNG=m -CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_ZCRYPT=m -CONFIG_PKEY=m -CONFIG_CRYPTO_PAES_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_AES_S390=m -CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=y -CONFIG_PKCS7_MESSAGE_PARSER=y -CONFIG_SYSTEM_TRUSTED_KEYRING=y -CONFIG_CRC7=m -CONFIG_CRC8=m -CONFIG_RANDOM32_SELFTEST=y -CONFIG_CORDIC=m -CONFIG_CMM=m -CONFIG_APPLDATA_BASE=y -CONFIG_KVM=m -CONFIG_KVM_S390_UCONTROL=y -CONFIG_VHOST_NET=m -CONFIG_VHOST_VSOCK=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index e4bc40073003..68d3ca83302b 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -12,30 +12,51 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_NUMA_BALANCING=y -# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_BLK_CGROUP=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_RDMA=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y CONFIG_CPUSETS=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_BPF=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y +CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set -CONFIG_CHECKPOINT_RESTORE=y CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y +CONFIG_LIVEPATCH=y +CONFIG_TUNE_ZEC12=y +CONFIG_NR_CPUS=512 +CONFIG_NUMA=y +# CONFIG_NUMA_EMU is not set +CONFIG_HZ_100=y +CONFIG_KEXEC_FILE=y +CONFIG_EXPOLINE=y +CONFIG_EXPOLINE_AUTO=y +CONFIG_CHSC_SCH=y +CONFIG_VFIO_CCW=m +CONFIG_VFIO_AP=m +CONFIG_CRASH_DUMP=y +CONFIG_HIBERNATION=y +CONFIG_PM_DEBUG=y +CONFIG_CMM=m +CONFIG_APPLDATA_BASE=y +CONFIG_KVM=m +CONFIG_VHOST_NET=m +CONFIG_VHOST_VSOCK=m CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y @@ -47,27 +68,18 @@ CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_MODULE_SIG=y CONFIG_MODULE_SIG_SHA256=y -CONFIG_BLK_DEV_INTEGRITY=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y -CONFIG_BLK_WBT_SQ=y +CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_BSD_DISKLABEL=y CONFIG_MINIX_SUBPARTITION=y CONFIG_SOLARIS_X86_PARTITION=y CONFIG_UNIXWARE_DISKLABEL=y -CONFIG_CFQ_GROUP_IOSCHED=y -CONFIG_DEFAULT_DEADLINE=y -CONFIG_LIVEPATCH=y -CONFIG_TUNE_ZEC12=y -CONFIG_NR_CPUS=512 -CONFIG_NUMA=y -CONFIG_HZ_100=y -CONFIG_KEXEC_FILE=y -CONFIG_KEXEC_VERIFY_SIG=y -CONFIG_EXPOLINE=y -CONFIG_EXPOLINE_AUTO=y +CONFIG_IOSCHED_BFQ=y +CONFIG_BFQ_GROUP_IOSCHED=y +CONFIG_BINFMT_MISC=m CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y @@ -81,16 +93,8 @@ CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y -CONFIG_PCI=y -CONFIG_HOTPLUG_PCI=y -CONFIG_HOTPLUG_PCI_S390=y -CONFIG_CHSC_SCH=y -CONFIG_VFIO_AP=m -CONFIG_VFIO_CCW=m -CONFIG_CRASH_DUMP=y -CONFIG_BINFMT_MISC=m -CONFIG_HIBERNATION=y -CONFIG_PM_DEBUG=y +CONFIG_PERCPU_STATS=y +CONFIG_GUP_BENCHMARK=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -119,9 +123,6 @@ CONFIG_NET_IPVTI=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_TCP_CONG_ADVANCED=y @@ -137,10 +138,6 @@ CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_MODE_TRANSPORT=m -CONFIG_INET6_XFRM_MODE_TUNNEL=m -CONFIG_INET6_XFRM_MODE_BEET=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m CONFIG_IPV6_VTI=m CONFIG_IPV6_SIT=m CONFIG_IPV6_GRE=m @@ -262,11 +259,8 @@ CONFIG_IP_VS_SED=m CONFIG_IP_VS_NQ=m CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m -CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NF_TABLES_IPV4=y -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -285,10 +279,7 @@ CONFIG_IP_NF_SECURITY=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_TABLES_IPV6=y -CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -307,7 +298,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_SECURITY=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_NF_TABLES_BRIDGE=y +CONFIG_NF_TABLES_BRIDGE=m CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m @@ -372,9 +363,11 @@ CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m +CONFIG_PCI=y +CONFIG_HOTPLUG_PCI=y +CONFIG_HOTPLUG_PCI_S390=y +CONFIG_UEVENT_HELPER=y CONFIG_DEVTMPFS=y -CONFIG_DMA_CMA=y -CONFIG_CMA_SIZE_MBYTES=0 CONFIG_CONNECTOR=y CONFIG_ZRAM=m CONFIG_BLK_DEV_LOOP=m @@ -383,6 +376,7 @@ CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 +# CONFIG_BLK_DEV_XPRAM is not set CONFIG_VIRTIO_BLK=y CONFIG_BLK_DEV_RBD=m CONFIG_BLK_DEV_NVME=m @@ -392,7 +386,6 @@ CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m CONFIG_BLK_DEV_SR=m CONFIG_CHR_DEV_SG=y CONFIG_CHR_DEV_SCH=m @@ -412,17 +405,19 @@ CONFIG_SCSI_DH_RDAC=m CONFIG_SCSI_DH_HP_SW=m CONFIG_SCSI_DH_EMC=m CONFIG_SCSI_DH_ALUA=m -CONFIG_SCSI_OSD_INITIATOR=m -CONFIG_SCSI_OSD_ULD=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=m CONFIG_MD_MULTIPATH=m CONFIG_MD_FAULTY=m +CONFIG_MD_CLUSTER=m +CONFIG_BCACHE=m CONFIG_BLK_DEV_DM=m +CONFIG_DM_UNSTRIPED=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m +CONFIG_DM_WRITECACHE=m CONFIG_DM_MIRROR=m CONFIG_DM_LOG_USERSPACE=m CONFIG_DM_RAID=m @@ -435,6 +430,7 @@ CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m CONFIG_DM_VERITY=m CONFIG_DM_SWITCH=m +CONFIG_DM_INTEGRITY=m CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m @@ -442,23 +438,78 @@ CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m -CONFIG_VXLAN=m CONFIG_TUN=m CONFIG_VETH=m CONFIG_VIRTIO_NET=m CONFIG_NLMON=m +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_NET_VENDOR_ADAPTEC is not set +# CONFIG_NET_VENDOR_AGERE is not set +# CONFIG_NET_VENDOR_ALACRITECH is not set +# CONFIG_NET_VENDOR_ALTEON is not set +# CONFIG_NET_VENDOR_AMAZON is not set +# CONFIG_NET_VENDOR_AMD is not set +# CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_ATHEROS is not set +# CONFIG_NET_VENDOR_AURORA is not set +# CONFIG_NET_VENDOR_BROADCOM is not set +# CONFIG_NET_VENDOR_BROCADE is not set +# CONFIG_NET_VENDOR_CADENCE is not set +# CONFIG_NET_VENDOR_CAVIUM is not set # CONFIG_NET_VENDOR_CHELSIO is not set +# CONFIG_NET_VENDOR_CISCO is not set +# CONFIG_NET_VENDOR_CORTINA is not set +# CONFIG_NET_VENDOR_DEC is not set +# CONFIG_NET_VENDOR_DLINK is not set +# CONFIG_NET_VENDOR_EMULEX is not set +# CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_GOOGLE is not set +# CONFIG_NET_VENDOR_HP is not set +# CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y +# CONFIG_MLXFW is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_MICROCHIP is not set +# CONFIG_NET_VENDOR_MICROSEMI is not set +# CONFIG_NET_VENDOR_MYRI is not set # CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_NETERION is not set +# CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_NI is not set +# CONFIG_NET_VENDOR_NVIDIA is not set +# CONFIG_NET_VENDOR_OKI is not set +# CONFIG_NET_VENDOR_PACKET_ENGINES is not set +# CONFIG_NET_VENDOR_QLOGIC is not set +# CONFIG_NET_VENDOR_QUALCOMM is not set +# CONFIG_NET_VENDOR_RDC is not set +# CONFIG_NET_VENDOR_REALTEK is not set +# CONFIG_NET_VENDOR_RENESAS is not set +# CONFIG_NET_VENDOR_ROCKER is not set +# CONFIG_NET_VENDOR_SAMSUNG is not set +# CONFIG_NET_VENDOR_SEEQ is not set +# CONFIG_NET_VENDOR_SOLARFLARE is not set +# CONFIG_NET_VENDOR_SILAN is not set +# CONFIG_NET_VENDOR_SIS is not set +# CONFIG_NET_VENDOR_SMSC is not set +# CONFIG_NET_VENDOR_SOCIONEXT is not set +# CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SUN is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set +# CONFIG_NET_VENDOR_TEHUTI is not set +# CONFIG_NET_VENDOR_TI is not set +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m +CONFIG_PPP_FILTER=y CONFIG_PPP_MPPE=m +CONFIG_PPP_MULTILINK=y CONFIG_PPPOE=m CONFIG_PPTP=m CONFIG_PPPOL2TP=m @@ -470,17 +521,21 @@ CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=0 +CONFIG_NULL_TTY=m CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m CONFIG_HANGCHECK_TIMER=m CONFIG_TN3270_FS=y +# CONFIG_PTP_1588_CLOCK is not set # CONFIG_HWMON is not set CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_CORE=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m CONFIG_DRM=y CONFIG_DRM_VIRTIO_GPU=y +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set @@ -495,8 +550,8 @@ CONFIG_VFIO_MDEV_DEVICE=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y -CONFIG_S390_AP_IOMMU=y CONFIG_S390_CCW_IOMMU=y +CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -546,8 +601,10 @@ CONFIG_ECRYPT_FS=m CONFIG_CRAMFS=m CONFIG_SQUASHFS=m CONFIG_SQUASHFS_XATTR=y +CONFIG_SQUASHFS_LZ4=y CONFIG_SQUASHFS_LZO=y CONFIG_SQUASHFS_XZ=y +CONFIG_SQUASHFS_ZSTD=y CONFIG_ROMFS_FS=m CONFIG_NFS_FS=m CONFIG_NFS_V3_ACL=y @@ -558,7 +615,6 @@ CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_NFSD_V4_SECURITY_LABEL=y CONFIG_CIFS=m -CONFIG_CIFS_STATS=y CONFIG_CIFS_STATS2=y CONFIG_CIFS_WEAK_PW_HASH=y CONFIG_CIFS_UPCALL=y @@ -574,13 +630,103 @@ CONFIG_NLS_ISO8859_1=m CONFIG_NLS_ISO8859_15=m CONFIG_NLS_UTF8=m CONFIG_DLM=m +CONFIG_UNICODE=y +CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y +CONFIG_ENCRYPTED_KEYS=m +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_INTEGRITY_SIGNATURE=y +CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y +CONFIG_IMA=y +CONFIG_IMA_DEFAULT_HASH_SHA256=y +CONFIG_IMA_WRITE_POLICY=y +CONFIG_IMA_APPRAISE=y +CONFIG_CRYPTO_FIPS=y +CONFIG_CRYPTO_USER=m +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_PCRYPT=m +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_AEGIS128=m +CONFIG_CRYPTO_AEGIS128L=m +CONFIG_CRYPTO_AEGIS256=m +CONFIG_CRYPTO_MORUS640=m +CONFIG_CRYPTO_MORUS1280=m +CONFIG_CRYPTO_CFB=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_OFB=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_CRC32=m +CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD128=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_RMD256=m +CONFIG_CRYPTO_RMD320=m +CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAMELLIA=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_FCRYPT=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_842=m +CONFIG_CRYPTO_LZ4=m +CONFIG_CRYPTO_LZ4HC=m +CONFIG_CRYPTO_ZSTD=m +CONFIG_CRYPTO_ANSI_CPRNG=m +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_CRYPTO_USER_API_SKCIPHER=m +CONFIG_CRYPTO_USER_API_RNG=m +CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_STATS=y +CONFIG_ZCRYPT=m +CONFIG_PKEY=m +CONFIG_CRYPTO_PAES_S390=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_GHASH_S390=m +CONFIG_CRYPTO_CRC32_S390=y +CONFIG_CORDIC=m +CONFIG_CRC4=m +CONFIG_CRC7=m +CONFIG_CRC8=m +CONFIG_DMA_CMA=y +CONFIG_CMA_SIZE_MBYTES=0 CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_GDB_SCRIPTS=y -# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_FRAME_WARN=1024 CONFIG_UNUSED_SYMBOLS=y +CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_PANIC_ON_OOPS=y @@ -599,82 +745,3 @@ CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BPF=m CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_S390_PTDUMP=y -CONFIG_PERSISTENT_KEYRINGS=y -CONFIG_BIG_KEYS=y -CONFIG_ENCRYPTED_KEYS=m -CONFIG_SECURITY=y -CONFIG_SECURITY_NETWORK=y -CONFIG_SECURITY_SELINUX=y -CONFIG_SECURITY_SELINUX_BOOTPARAM=y -CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 -CONFIG_SECURITY_SELINUX_DISABLE=y -CONFIG_INTEGRITY_SIGNATURE=y -CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y -CONFIG_IMA=y -CONFIG_IMA_DEFAULT_HASH_SHA256=y -CONFIG_IMA_WRITE_POLICY=y -CONFIG_IMA_APPRAISE=y -CONFIG_CRYPTO_FIPS=y -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m -CONFIG_CRYPTO_USER=m -# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set -CONFIG_CRYPTO_PCRYPT=m -CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD128=m -CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_TGR192=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_AES_TI=m -CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_FCRYPT=m -CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m -CONFIG_CRYPTO_SEED=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_TEA=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_842=m -CONFIG_CRYPTO_LZ4=m -CONFIG_CRYPTO_LZ4HC=m -CONFIG_CRYPTO_ANSI_CPRNG=m -CONFIG_CRYPTO_USER_API_HASH=m -CONFIG_CRYPTO_USER_API_SKCIPHER=m -CONFIG_CRYPTO_USER_API_RNG=m -CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_ZCRYPT=m -CONFIG_PKEY=m -CONFIG_CRYPTO_PAES_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_AES_S390=m -CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=y -CONFIG_CRC7=m -CONFIG_CRC8=m -CONFIG_CORDIC=m -CONFIG_CMM=m -CONFIG_APPLDATA_BASE=y -CONFIG_KVM=m -CONFIG_KVM_S390_UCONTROL=y -CONFIG_VHOST_NET=m -CONFIG_VHOST_VSOCK=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index d92bab844b73..be09a208b608 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -1,27 +1,33 @@ # CONFIG_SWAP is not set CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +# CONFIG_CPU_ISOLATION is not set +# CONFIG_UTS_NS is not set +# CONFIG_PID_NS is not set +# CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_COMPAT_BRK is not set -CONFIG_PARTITION_ADVANCED=y -CONFIG_IBM_PARTITION=y -CONFIG_DEFAULT_DEADLINE=y CONFIG_TUNE_ZEC12=y # CONFIG_COMPAT is not set CONFIG_NR_CPUS=2 -# CONFIG_HOTPLUG_CPU is not set CONFIG_HZ_100=y # CONFIG_ARCH_RANDOM is not set -# CONFIG_COMPACTION is not set -# CONFIG_MIGRATION is not set -# CONFIG_BOUNCE is not set -# CONFIG_CHECK_STACK is not set +# CONFIG_RELOCATABLE is not set # CONFIG_CHSC_SCH is not set # CONFIG_SCM_BUS is not set CONFIG_CRASH_DUMP=y -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_SECCOMP is not set +# CONFIG_PFAULT is not set +# CONFIG_S390_HYPFS_FS is not set +# CONFIG_VIRTUALIZATION is not set +# CONFIG_S390_GUEST is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_IBM_PARTITION=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_COMPACTION is not set +# CONFIG_MIGRATION is not set +# CONFIG_BOUNCE is not set CONFIG_NET=y # CONFIG_IUCV is not set CONFIG_DEVTMPFS=y @@ -43,7 +49,6 @@ CONFIG_ZFCP=y # CONFIG_HVC_IUCV is not set # CONFIG_HW_RANDOM_S390 is not set CONFIG_RAW_DRIVER=y -# CONFIG_SCLP_ASYNC is not set # CONFIG_HMC_DRV is not set # CONFIG_S390_TAPE is not set # CONFIG_VMCP is not set @@ -56,6 +61,7 @@ CONFIG_RAW_DRIVER=y CONFIG_CONFIGFS_FS=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set +# CONFIG_DIMLIB is not set CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y @@ -64,7 +70,4 @@ CONFIG_PANIC_ON_OOPS=y # CONFIG_SCHED_DEBUG is not set CONFIG_RCU_CPU_STALL_TIMEOUT=60 # CONFIG_FTRACE is not set -# CONFIG_PFAULT is not set -# CONFIG_S390_HYPFS_FS is not set -# CONFIG_VIRTUALIZATION is not set -# CONFIG_S390_GUEST is not set +# CONFIG_RUNTIME_TESTING_MENU is not set From 868202ce15a7b52534257426a94f40b6d9f6150f Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 17 Jul 2019 19:38:42 +0200 Subject: [PATCH 110/277] s390/boot: add missing declarations and includes Add __swsusp_reset_dma declaration to avoid the following sparse warnings: arch/s390/kernel/setup.c:107:15: warning: symbol '__swsusp_reset_dma' was not declared. Should it be static? arch/s390/boot/startup.c:52:15: warning: symbol '__swsusp_reset_dma' was not declared. Should it be static? Add verify_facilities declaration to avoid the following sparse warning: arch/s390/boot/als.c:105:6: warning: symbol 'verify_facilities' was not declared. Should it be static? Include "boot.h" into arch/s390/boot/kaslr.c to expose get_random_base function declaration and avoid the following sparse warning: arch/s390/boot/kaslr.c:90:15: warning: symbol 'get_random_base' was not declared. Should it be static? Signed-off-by: Vasily Gorbik --- arch/s390/boot/boot.h | 1 + arch/s390/boot/kaslr.c | 1 + arch/s390/include/asm/setup.h | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 082905d97309..1c3b2b257637 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -8,6 +8,7 @@ void store_ipl_parmblock(void); void setup_boot_command_line(void); void parse_boot_command_line(void); void setup_memory_end(void); +void verify_facilities(void); void print_missing_facilities(void); unsigned long get_random_base(unsigned long safe_addr); diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index 3bdd8132e56b..c34a6387ce38 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -7,6 +7,7 @@ #include #include #include "compressed/decompressor.h" +#include "boot.h" #define PRNG_MODE_TDES 1 #define PRNG_MODE_SHA512 2 diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index c5cfff7b1f91..70bd65724ec4 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -84,6 +84,7 @@ extern int noexec_disabled; extern int memory_end_set; extern unsigned long memory_end; extern unsigned long max_physmem_end; +extern unsigned long __swsusp_reset_dma; #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) From d25220d2f2ece9e516588ed5df2ed373069b3a02 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 17 Jul 2019 20:07:42 +0200 Subject: [PATCH 111/277] s390/lib: add missing include Include into arch/s390/lib/xor.c to expose xor_block_xc declaration and avoid the following sparse warning: arch/s390/lib/xor.c:128:27: warning: symbol 'xor_block_xc' was not declared. Should it be static? Signed-off-by: Vasily Gorbik --- arch/s390/lib/xor.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c index 96580590ccaf..29d9470dbceb 100644 --- a/arch/s390/lib/xor.c +++ b/arch/s390/lib/xor.c @@ -9,6 +9,7 @@ #include #include #include +#include static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) { From 06f9895fda39422fb9250a78454e69aadace13c7 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 17 Jul 2019 20:05:11 +0200 Subject: [PATCH 112/277] s390/perf: make cf_diag_csd static Since there is really no reason for cf_diag_csd per cpu variable to be globally visible make it static to avoid the following sparse warning: arch/s390/kernel/perf_cpum_cf_diag.c:37:1: warning: symbol 'cf_diag_csd' was not declared. Should it be static? Signed-off-by: Vasily Gorbik --- arch/s390/kernel/perf_cpum_cf_diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index d4e031f7b9c8..5f1fd1581330 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -34,7 +34,7 @@ struct cf_diag_csd { /* Counter set data per CPU */ unsigned char start[PAGE_SIZE]; /* Counter set at event start */ unsigned char data[PAGE_SIZE]; /* Counter set at event delete */ }; -DEFINE_PER_CPU(struct cf_diag_csd, cf_diag_csd); +static DEFINE_PER_CPU(struct cf_diag_csd, cf_diag_csd); /* Counter sets are stored as data stream in a page sized memory buffer and * exported to user space via raw data attached to the event sample data. From 1877011a3568fcadf2ee28f4d02fe5c31b1bb060 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 17 Jul 2019 19:42:22 +0200 Subject: [PATCH 113/277] s390/kexec: add missing include to machine_kexec_reloc.c Include into machine_kexec_reloc.c to expose arch_kexec_do_relocs declaration and avoid the following sparse warnings: arch/s390/kernel/machine_kexec_reloc.c:4:5: warning: symbol 'arch_kexec_do_relocs' was not declared. Should it be static? arch/s390/boot/../kernel/machine_kexec_reloc.c:4:5: warning: symbol 'arch_kexec_do_relocs' was not declared. Should it be static? Signed-off-by: Vasily Gorbik --- arch/s390/kernel/machine_kexec_reloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/machine_kexec_reloc.c b/arch/s390/kernel/machine_kexec_reloc.c index 1dded39239f8..3b664cb3ec4d 100644 --- a/arch/s390/kernel/machine_kexec_reloc.c +++ b/arch/s390/kernel/machine_kexec_reloc.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val, unsigned long addr) From ffbd268506ca70c2cd18238762a29b9a47d8d9fa Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 17 Jul 2019 19:41:09 +0200 Subject: [PATCH 114/277] s390/mm: make gmap_test_and_clear_dirty_pmd static Since gmap_test_and_clear_dirty_pmd is not exported and has no reason to be globally visible make it static to avoid the following sparse warning: arch/s390/mm/gmap.c:2427:6: warning: symbol 'gmap_test_and_clear_dirty_pmd' was not declared. Should it be static? Signed-off-by: Vasily Gorbik --- arch/s390/mm/gmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 1e668b95e0c6..39c3a6e3d262 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2424,8 +2424,8 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global); * This function is assumed to be called with the guest_table_lock * held. */ -bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp, - unsigned long gaddr) +static bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp, + unsigned long gaddr) { if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) return false; From 7f5aa1154b1a30653a12e0f7f473494de77ad670 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 29 Jul 2019 07:39:44 +0200 Subject: [PATCH 115/277] s390/mm: add fallthrough annotations Commit a035d552a93b ("Makefile: Globally enable fall-through warning") enables fall-through warnings globally. Add missing annotations. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/fault.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 63507662828f..7b0bb475c166 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -327,6 +327,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int access, case VM_FAULT_BADACCESS: if (access == VM_EXEC && signal_return(regs) == 0) break; + /* fallthrough */ case VM_FAULT_BADMAP: /* Bad memory access. Check if it is kernel or user space. */ if (user_mode(regs)) { @@ -336,7 +337,9 @@ static noinline void do_fault_error(struct pt_regs *regs, int access, do_sigsegv(regs, si_code); break; } + /* fallthrough */ case VM_FAULT_BADCONTEXT: + /* fallthrough */ case VM_FAULT_PFAULT: do_no_context(regs); break; From 90a93ff4051ede8320c5576d99f34e0f75e99c1a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 29 Jul 2019 07:47:02 +0200 Subject: [PATCH 116/277] s390/tape: add fallthrough annotations Commit a035d552a93b ("Makefile: Globally enable fall-through warning") enables fall-through warnings globally. Add missing annotations. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/char/tape_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c index 8d3370da2dfc..3e0b2f63a9d2 100644 --- a/drivers/s390/char/tape_core.c +++ b/drivers/s390/char/tape_core.c @@ -677,6 +677,7 @@ tape_generic_remove(struct ccw_device *cdev) switch (device->tape_state) { case TS_INIT: tape_state_set(device, TS_NOT_OPER); + /* fallthrough */ case TS_NOT_OPER: /* * Nothing to do. @@ -949,6 +950,7 @@ __tape_start_request(struct tape_device *device, struct tape_request *request) break; if (device->tape_state == TS_UNUSED) break; + /* fallthrough */ default: if (device->tape_state == TS_BLKUSE) break; @@ -1116,6 +1118,7 @@ __tape_do_irq (struct ccw_device *cdev, unsigned long intparm, struct irb *irb) case -ETIMEDOUT: DBF_LH(1, "(%08x): Request timed out\n", device->cdev_id); + /* fallthrough */ case -EIO: __tape_end_request(device, request, -EIO); break; From 943dd5fa70ada0266c3a572c641be537b69ae2a8 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 29 Jul 2019 01:01:21 +0200 Subject: [PATCH 117/277] s390/3215: add switch fall through comment for -Wimplicit-fallthrough Silence the following warning when built with -Wimplicit-fallthrough=3 enabled by default since 5.3-rc2: drivers/s390/char/con3215.c: In function 'raw3215_irq': drivers/s390/char/con3215.c:399:6: warning: this statement may fall through [-Wimplicit-fallthrough=] 399 | if (dstat == 0x08) | ^ drivers/s390/char/con3215.c:401:2: note: here 401 | case 0x04: | ^~~~ Signed-off-by: Vasily Gorbik --- drivers/s390/char/con3215.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index 8c9d412b6d33..e7cf0a1d4f71 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -398,6 +398,7 @@ static void raw3215_irq(struct ccw_device *cdev, unsigned long intparm, } if (dstat == 0x08) break; + /* else, fall through */ case 0x04: /* Device end interrupt. */ if ((raw = req->info) == NULL) From 8480657280ee769ad23101297e1e6be0f8d205ec Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 17 Jul 2019 20:09:28 +0200 Subject: [PATCH 118/277] vfio-ccw: make vfio_ccw_async_region_ops static Since vfio_ccw_async_region_ops is not exported and has no reason to be globally visible make it static to avoid the following sparse warning: drivers/s390/cio/vfio_ccw_async.c:73:30: warning: symbol 'vfio_ccw_async_region_ops' was not declared. Should it be static? Fixes: d5afd5d135c8 ("vfio-ccw: add handling for async channel instructions") Reviewed-by: Cornelia Huck Signed-off-by: Vasily Gorbik --- drivers/s390/cio/vfio_ccw_async.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/vfio_ccw_async.c b/drivers/s390/cio/vfio_ccw_async.c index 8c1d2357ef5b..7a838e3d7c0f 100644 --- a/drivers/s390/cio/vfio_ccw_async.c +++ b/drivers/s390/cio/vfio_ccw_async.c @@ -70,7 +70,7 @@ static void vfio_ccw_async_region_release(struct vfio_ccw_private *private, } -const struct vfio_ccw_regops vfio_ccw_async_region_ops = { +static const struct vfio_ccw_regops vfio_ccw_async_region_ops = { .read = vfio_ccw_async_region_read, .write = vfio_ccw_async_region_write, .release = vfio_ccw_async_region_release, From a07fc0bb483eb24444cebd59a8112ce6e6964c48 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 24 Jul 2019 14:54:43 +0800 Subject: [PATCH 119/277] RDMA/hns: Fix build error If INFINIBAND_HNS_HIP08 is selected and HNS3 is m, but INFINIBAND_HNS is y, building fails: drivers/infiniband/hw/hns/hns_roce_hw_v2.o: In function `hns_roce_hw_v2_exit': hns_roce_hw_v2.c:(.exit.text+0xd): undefined reference to `hnae3_unregister_client' drivers/infiniband/hw/hns/hns_roce_hw_v2.o: In function `hns_roce_hw_v2_init': hns_roce_hw_v2.c:(.init.text+0xd): undefined reference to `hnae3_register_client' Also if INFINIBAND_HNS_HIP06 is selected and HNS_DSAF is m, but INFINIBAND_HNS is y, building fails: drivers/infiniband/hw/hns/hns_roce_hw_v1.o: In function `hns_roce_v1_reset': hns_roce_hw_v1.c:(.text+0x39fa): undefined reference to `hns_dsaf_roce_reset' hns_roce_hw_v1.c:(.text+0x3a25): undefined reference to `hns_dsaf_roce_reset' Reported-by: Hulk Robot Fixes: dd74282df573 ("RDMA/hns: Initialize the PCI device for hip08 RoCE") Fixes: 08805fdbeb2d ("RDMA/hns: Split hw v1 driver from hns roce driver") Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20190724065443.53068-1-yuehaibing@huawei.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/Kconfig | 6 +++--- drivers/infiniband/hw/hns/Makefile | 8 ++------ 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig index 8bf847bcd8d3..54782197c717 100644 --- a/drivers/infiniband/hw/hns/Kconfig +++ b/drivers/infiniband/hw/hns/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only config INFINIBAND_HNS - tristate "HNS RoCE Driver" + bool "HNS RoCE Driver" depends on NET_VENDOR_HISILICON depends on ARM64 || (COMPILE_TEST && 64BIT) ---help--- @@ -11,7 +11,7 @@ config INFINIBAND_HNS To compile HIP06 or HIP08 driver as module, choose M here. config INFINIBAND_HNS_HIP06 - bool "Hisilicon Hip06 Family RoCE support" + tristate "Hisilicon Hip06 Family RoCE support" depends on INFINIBAND_HNS && HNS && HNS_DSAF && HNS_ENET ---help--- RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip06 and @@ -21,7 +21,7 @@ config INFINIBAND_HNS_HIP06 module will be called hns-roce-hw-v1 config INFINIBAND_HNS_HIP08 - bool "Hisilicon Hip08 Family RoCE support" + tristate "Hisilicon Hip08 Family RoCE support" depends on INFINIBAND_HNS && PCI && HNS3 ---help--- RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC. diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index e105945b94a1..449a2d81319d 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -9,12 +9,8 @@ hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o -ifdef CONFIG_INFINIBAND_HNS_HIP06 hns-roce-hw-v1-objs := hns_roce_hw_v1.o $(hns-roce-objs) -obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v1.o -endif +obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o -ifdef CONFIG_INFINIBAND_HNS_HIP08 hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o $(hns-roce-objs) -obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o -endif +obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o From 61c30c98ef17e5a330d7bb8494b78b3d6dffe9b8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 29 Jul 2019 13:57:49 +0200 Subject: [PATCH 120/277] dax: Fix missed wakeup in put_unlocked_entry() The condition checking whether put_unlocked_entry() needs to wake up following waiter got broken by commit 23c84eb78375 ("dax: Fix missed wakeup with PMD faults"). We need to wake the waiter whenever the passed entry is valid (i.e., non-NULL and not special conflict entry). This could lead to processes never being woken up when waiting for entry lock. Fix the condition. Cc: Link: http://lore.kernel.org/r/20190729120228.GC17833@quack2.suse.cz Fixes: 23c84eb78375 ("dax: Fix missed wakeup with PMD faults") Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dax.c b/fs/dax.c index a237141d8787..b64964ef44f6 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -266,7 +266,7 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry) static void put_unlocked_entry(struct xa_state *xas, void *entry) { /* If we were the only waiter woken, wake the next one */ - if (entry && dax_is_conflict(entry)) + if (entry && !dax_is_conflict(entry)) dax_wake_entry(xas, entry, false); } From 15fe6a8dcc3b48358c28e17b485fc837f9605ec4 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 28 Jul 2019 14:13:38 +0300 Subject: [PATCH 121/277] RDMA/qedr: Fix the hca_type and hca_rev returned in device attributes There was a place holder for hca_type and vendor was returned in hca_rev. Fix the hca_rev to return the hw revision and fix the hca_type to return an informative string representing the hca. Signed-off-by: Michal Kalderon Link: https://lore.kernel.org/r/20190728111338.21930-1-michal.kalderon@marvell.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 533157a2a3be..f97b3d65b30c 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -125,14 +125,20 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct qedr_dev *dev = rdma_device_to_drv_device(device, struct qedr_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor); + return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->attr.hw_ver); } static DEVICE_ATTR_RO(hw_rev); static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { - return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET"); + struct qedr_dev *dev = + rdma_device_to_drv_device(device, struct qedr_dev, ibdev); + + return scnprintf(buf, PAGE_SIZE, "FastLinQ QL%x %s\n", + dev->pdev->device, + rdma_protocol_iwarp(&dev->ibdev, 1) ? + "iWARP" : "RoCE"); } static DEVICE_ATTR_RO(hca_type); From 37151a41df800493cfcbbef4f7208ffe04feb959 Mon Sep 17 00:00:00 2001 From: Yuki Tsunashima Date: Mon, 29 Jul 2019 17:10:36 +0200 Subject: [PATCH 122/277] ALSA: pcm: fix lost wakeup event scenarios in snd_pcm_drain lost wakeup can occur after enabling irq, therefore put task into interruptible before enabling interrupts, without this change, task can be put to sleep and snd_pcm_drain will delay Fixes: f2b3614cefb6 ("ALSA: PCM - Don't check DMA time-out too shortly") Signed-off-by: Yuki Tsunashima Signed-off-by: Suresh Udipi [ported from 4.9] Signed-off-by: Adam Miartus Signed-off-by: Takashi Iwai --- sound/core/pcm_native.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 12dd9b318db1..703857aab00f 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -1873,6 +1873,7 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream, if (!to_check) break; /* all drained */ init_waitqueue_entry(&wait, current); + set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&to_check->sleep, &wait); snd_pcm_stream_unlock_irq(substream); if (runtime->no_period_wakeup) @@ -1885,7 +1886,7 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream, } tout = msecs_to_jiffies(tout * 1000); } - tout = schedule_timeout_interruptible(tout); + tout = schedule_timeout(tout); snd_pcm_stream_lock_irq(substream); group = snd_pcm_stream_group_ref(substream); From 708637e65abd487ebb75fb55401c36a466c3135b Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Sat, 27 Jul 2019 12:38:32 +0200 Subject: [PATCH 123/277] Do not dereference 'siw_crypto_shash' before checking Reported-by: "Dan Carpenter" Fixes: f29dd55b0236 ("rdma/siw: queue pair methods") Link: https://lore.kernel.org/r/OF61E386ED.49A73798-ON00258444.003BD6A6-00258444.003CC8D9@notes.na.collabserv.com Signed-off-by: Bernard Metzler Signed-off-by: Doug Ledford --- drivers/infiniband/sw/siw/siw_qp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 11383d9f95ef..e27bd5b35b96 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -220,12 +220,14 @@ static int siw_qp_enable_crc(struct siw_qp *qp) { struct siw_rx_stream *c_rx = &qp->rx_stream; struct siw_iwarp_tx *c_tx = &qp->tx_ctx; - int size = crypto_shash_descsize(siw_crypto_shash) + - sizeof(struct shash_desc); + int size; if (siw_crypto_shash == NULL) return -ENOENT; + size = crypto_shash_descsize(siw_crypto_shash) + + sizeof(struct shash_desc); + c_tx->mpa_crc_hd = kzalloc(size, GFP_KERNEL); c_rx->mpa_crc_hd = kzalloc(size, GFP_KERNEL); if (!c_tx->mpa_crc_hd || !c_rx->mpa_crc_hd) { From 7be21763f703d0f7e878283ec31e52b225097449 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 29 Jul 2019 16:10:53 -0500 Subject: [PATCH 124/277] ataflop: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. This patch fixes the following warning (Building: m68k): drivers/block/ataflop.c: In function ‘fd_locked_ioctl’: drivers/block/ataflop.c:1728:3: warning: this statement may fall through [-Wimplicit-fallthrough=] set_capacity(floppy->disk, MAX_DISK_SIZE * 2); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/block/ataflop.c:1729:2: note: here case FDFMTEND: ^~~~ Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jens Axboe --- drivers/block/ataflop.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 85f20e371f2f..bd7d3bb8b890 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1726,6 +1726,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, /* MSch: invalidate default_params */ default_params[drive].blocks = 0; set_capacity(floppy->disk, MAX_DISK_SIZE * 2); + /* Fall through */ case FDFMTEND: case FDFLUSH: /* invalidate the buffer track to force a reread */ From 71d6c505b4d9e6f76586350450e785e3d452b346 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 29 Jul 2019 14:47:22 -0700 Subject: [PATCH 125/277] libata: zpodd: Fix small read overflow in zpodd_get_mech_type() Jeffrin reported a KASAN issue: BUG: KASAN: global-out-of-bounds in ata_exec_internal_sg+0x50f/0xc70 Read of size 16 at addr ffffffff91f41f80 by task scsi_eh_1/149 ... The buggy address belongs to the variable: cdb.48319+0x0/0x40 Much like commit 18c9a99bce2a ("libata: zpodd: small read overflow in eject_tray()"), this fixes a cdb[] buffer length, this time in zpodd_get_mech_type(): We read from the cdb[] buffer in ata_exec_internal_sg(). It has to be ATAPI_CDB_LEN (16) bytes long, but this buffer is only 12 bytes. Reported-by: Jeffrin Jose T Fixes: afe759511808c ("libata: identify and init ZPODD devices") Link: https://lore.kernel.org/lkml/201907181423.E808958@keescook/ Tested-by: Jeffrin Jose T Reviewed-by: Nick Desaulniers Signed-off-by: Kees Cook Signed-off-by: Jens Axboe --- drivers/ata/libata-zpodd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c index 173e6f2dd9af..eefda51f97d3 100644 --- a/drivers/ata/libata-zpodd.c +++ b/drivers/ata/libata-zpodd.c @@ -56,7 +56,7 @@ static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev) unsigned int ret; struct rm_feature_desc *desc; struct ata_taskfile tf; - static const char cdb[] = { GPCMD_GET_CONFIGURATION, + static const char cdb[ATAPI_CDB_LEN] = { GPCMD_GET_CONFIGURATION, 2, /* only 1 feature descriptor requested */ 0, 3, /* 3, removable medium feature */ 0, 0, 0,/* reserved */ From f3e4ff28b8685d856f381ee6bcf88b6149a6db5b Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 24 Jul 2019 11:00:54 +0200 Subject: [PATCH 126/277] scsi: libfc: Whitespace cleanup in libfc.h No functional change. [mkp: typo] Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/scsi/libfc.h | 52 ++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 2d64b53f947c..9b87e1a1c646 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -115,7 +115,7 @@ struct fc_disc_port { struct fc_lport *lp; struct list_head peers; struct work_struct rport_work; - u32 port_id; + u32 port_id; }; /** @@ -155,14 +155,14 @@ struct fc_rport_operations { */ struct fc_rport_libfc_priv { struct fc_lport *local_port; - enum fc_rport_state rp_state; + enum fc_rport_state rp_state; u16 flags; #define FC_RP_FLAGS_REC_SUPPORTED (1 << 0) #define FC_RP_FLAGS_RETRY (1 << 1) #define FC_RP_STARTED (1 << 2) #define FC_RP_FLAGS_CONF_REQ (1 << 3) - unsigned int e_d_tov; - unsigned int r_a_tov; + unsigned int e_d_tov; + unsigned int r_a_tov; }; /** @@ -191,24 +191,24 @@ struct fc_rport_priv { struct fc_lport *local_port; struct fc_rport *rport; struct kref kref; - enum fc_rport_state rp_state; + enum fc_rport_state rp_state; struct fc_rport_identifiers ids; u16 flags; - u16 max_seq; + u16 max_seq; u16 disc_id; u16 maxframe_size; - unsigned int retries; - unsigned int major_retries; - unsigned int e_d_tov; - unsigned int r_a_tov; - struct mutex rp_mutex; + unsigned int retries; + unsigned int major_retries; + unsigned int e_d_tov; + unsigned int r_a_tov; + struct mutex rp_mutex; struct delayed_work retry_work; - enum fc_rport_event event; + enum fc_rport_event event; struct fc_rport_operations *ops; - struct list_head peers; - struct work_struct event_work; + struct list_head peers; + struct work_struct event_work; u32 supported_classes; - u16 prli_count; + u16 prli_count; struct rcu_head rcu; u16 sp_features; u8 spp_type; @@ -618,12 +618,12 @@ struct libfc_function_template { * @disc_callback: Callback routine called when discovery completes */ struct fc_disc { - unsigned char retry_count; - unsigned char pending; - unsigned char requested; - unsigned short seq_count; - unsigned char buf_len; - u16 disc_id; + unsigned char retry_count; + unsigned char pending; + unsigned char requested; + unsigned short seq_count; + unsigned char buf_len; + u16 disc_id; struct list_head rports; void *priv; @@ -697,7 +697,7 @@ struct fc_lport { struct fc_rport_priv *ms_rdata; struct fc_rport_priv *ptp_rdata; void *scsi_priv; - struct fc_disc disc; + struct fc_disc disc; /* Virtual port information */ struct list_head vports; @@ -715,7 +715,7 @@ struct fc_lport { u8 retry_count; /* Fabric information */ - u32 port_id; + u32 port_id; u64 wwpn; u64 wwnn; unsigned int service_params; @@ -743,11 +743,11 @@ struct fc_lport { struct fc_ns_fts fcts; /* Miscellaneous */ - struct mutex lp_mutex; - struct list_head list; + struct mutex lp_mutex; + struct list_head list; struct delayed_work retry_work; void *prov[FC_FC4_PROV_SIZE]; - struct list_head lport_list; + struct list_head lport_list; }; /** From 023358b136d490ca91735ac6490db3741af5a8bd Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 24 Jul 2019 11:00:55 +0200 Subject: [PATCH 127/277] scsi: fcoe: Embed fc_rport_priv in fcoe_rport structure Gcc-9 complains for a memset across pointer boundaries, which happens as the code tries to allocate a flexible array on the stack. Turns out we cannot do this without relying on gcc-isms, so with this patch we'll embed the fc_rport_priv structure into fcoe_rport, can use the normal 'container_of' outcast, and will only have to do a memset over one structure. Signed-off-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/fcoe/fcoe_ctlr.c | 51 ++++++++++++++--------------------- drivers/scsi/libfc/fc_rport.c | 5 +++- include/scsi/libfcoe.h | 1 + 3 files changed, 25 insertions(+), 32 deletions(-) diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 1a85fe9e4b7b..fc32b5d76821 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -2005,7 +2005,7 @@ EXPORT_SYMBOL_GPL(fcoe_wwn_from_mac); */ static inline struct fcoe_rport *fcoe_ctlr_rport(struct fc_rport_priv *rdata) { - return (struct fcoe_rport *)(rdata + 1); + return container_of(rdata, struct fcoe_rport, rdata); } /** @@ -2269,7 +2269,7 @@ static void fcoe_ctlr_vn_start(struct fcoe_ctlr *fip) */ static int fcoe_ctlr_vn_parse(struct fcoe_ctlr *fip, struct sk_buff *skb, - struct fc_rport_priv *rdata) + struct fcoe_rport *frport) { struct fip_header *fiph; struct fip_desc *desc = NULL; @@ -2277,16 +2277,12 @@ static int fcoe_ctlr_vn_parse(struct fcoe_ctlr *fip, struct fip_wwn_desc *wwn = NULL; struct fip_vn_desc *vn = NULL; struct fip_size_desc *size = NULL; - struct fcoe_rport *frport; size_t rlen; size_t dlen; u32 desc_mask = 0; u32 dtype; u8 sub; - memset(rdata, 0, sizeof(*rdata) + sizeof(*frport)); - frport = fcoe_ctlr_rport(rdata); - fiph = (struct fip_header *)skb->data; frport->flags = ntohs(fiph->fip_flags); @@ -2349,15 +2345,17 @@ static int fcoe_ctlr_vn_parse(struct fcoe_ctlr *fip, if (dlen != sizeof(struct fip_wwn_desc)) goto len_err; wwn = (struct fip_wwn_desc *)desc; - rdata->ids.node_name = get_unaligned_be64(&wwn->fd_wwn); + frport->rdata.ids.node_name = + get_unaligned_be64(&wwn->fd_wwn); break; case FIP_DT_VN_ID: if (dlen != sizeof(struct fip_vn_desc)) goto len_err; vn = (struct fip_vn_desc *)desc; memcpy(frport->vn_mac, vn->fd_mac, ETH_ALEN); - rdata->ids.port_id = ntoh24(vn->fd_fc_id); - rdata->ids.port_name = get_unaligned_be64(&vn->fd_wwpn); + frport->rdata.ids.port_id = ntoh24(vn->fd_fc_id); + frport->rdata.ids.port_name = + get_unaligned_be64(&vn->fd_wwpn); break; case FIP_DT_FC4F: if (dlen != sizeof(struct fip_fc4_feat)) @@ -2738,10 +2736,7 @@ static int fcoe_ctlr_vn_recv(struct fcoe_ctlr *fip, struct sk_buff *skb) { struct fip_header *fiph; enum fip_vn2vn_subcode sub; - struct { - struct fc_rport_priv rdata; - struct fcoe_rport frport; - } buf; + struct fcoe_rport frport = { }; int rc, vlan_id = 0; fiph = (struct fip_header *)skb->data; @@ -2757,7 +2752,7 @@ static int fcoe_ctlr_vn_recv(struct fcoe_ctlr *fip, struct sk_buff *skb) goto drop; } - rc = fcoe_ctlr_vn_parse(fip, skb, &buf.rdata); + rc = fcoe_ctlr_vn_parse(fip, skb, &frport); if (rc) { LIBFCOE_FIP_DBG(fip, "vn_recv vn_parse error %d\n", rc); goto drop; @@ -2766,19 +2761,19 @@ static int fcoe_ctlr_vn_recv(struct fcoe_ctlr *fip, struct sk_buff *skb) mutex_lock(&fip->ctlr_mutex); switch (sub) { case FIP_SC_VN_PROBE_REQ: - fcoe_ctlr_vn_probe_req(fip, &buf.rdata); + fcoe_ctlr_vn_probe_req(fip, &frport.rdata); break; case FIP_SC_VN_PROBE_REP: - fcoe_ctlr_vn_probe_reply(fip, &buf.rdata); + fcoe_ctlr_vn_probe_reply(fip, &frport.rdata); break; case FIP_SC_VN_CLAIM_NOTIFY: - fcoe_ctlr_vn_claim_notify(fip, &buf.rdata); + fcoe_ctlr_vn_claim_notify(fip, &frport.rdata); break; case FIP_SC_VN_CLAIM_REP: - fcoe_ctlr_vn_claim_resp(fip, &buf.rdata); + fcoe_ctlr_vn_claim_resp(fip, &frport.rdata); break; case FIP_SC_VN_BEACON: - fcoe_ctlr_vn_beacon(fip, &buf.rdata); + fcoe_ctlr_vn_beacon(fip, &frport.rdata); break; default: LIBFCOE_FIP_DBG(fip, "vn_recv unknown subcode %d\n", sub); @@ -2802,22 +2797,18 @@ drop: */ static int fcoe_ctlr_vlan_parse(struct fcoe_ctlr *fip, struct sk_buff *skb, - struct fc_rport_priv *rdata) + struct fcoe_rport *frport) { struct fip_header *fiph; struct fip_desc *desc = NULL; struct fip_mac_desc *macd = NULL; struct fip_wwn_desc *wwn = NULL; - struct fcoe_rport *frport; size_t rlen; size_t dlen; u32 desc_mask = 0; u32 dtype; u8 sub; - memset(rdata, 0, sizeof(*rdata) + sizeof(*frport)); - frport = fcoe_ctlr_rport(rdata); - fiph = (struct fip_header *)skb->data; frport->flags = ntohs(fiph->fip_flags); @@ -2871,7 +2862,8 @@ static int fcoe_ctlr_vlan_parse(struct fcoe_ctlr *fip, if (dlen != sizeof(struct fip_wwn_desc)) goto len_err; wwn = (struct fip_wwn_desc *)desc; - rdata->ids.node_name = get_unaligned_be64(&wwn->fd_wwn); + frport->rdata.ids.node_name = + get_unaligned_be64(&wwn->fd_wwn); break; default: LIBFCOE_FIP_DBG(fip, "unexpected descriptor type %x " @@ -2982,22 +2974,19 @@ static int fcoe_ctlr_vlan_recv(struct fcoe_ctlr *fip, struct sk_buff *skb) { struct fip_header *fiph; enum fip_vlan_subcode sub; - struct { - struct fc_rport_priv rdata; - struct fcoe_rport frport; - } buf; + struct fcoe_rport frport = { }; int rc; fiph = (struct fip_header *)skb->data; sub = fiph->fip_subcode; - rc = fcoe_ctlr_vlan_parse(fip, skb, &buf.rdata); + rc = fcoe_ctlr_vlan_parse(fip, skb, &frport); if (rc) { LIBFCOE_FIP_DBG(fip, "vlan_recv vlan_parse error %d\n", rc); goto drop; } mutex_lock(&fip->ctlr_mutex); if (sub == FIP_SC_VL_REQ) - fcoe_ctlr_vlan_disc_reply(fip, &buf.rdata); + fcoe_ctlr_vlan_disc_reply(fip, &frport.rdata); mutex_unlock(&fip->ctlr_mutex); drop: diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index e0f3852fdad1..da6e97d8dc3b 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -128,6 +128,7 @@ EXPORT_SYMBOL(fc_rport_lookup); struct fc_rport_priv *fc_rport_create(struct fc_lport *lport, u32 port_id) { struct fc_rport_priv *rdata; + size_t rport_priv_size = sizeof(*rdata); lockdep_assert_held(&lport->disc.disc_mutex); @@ -135,7 +136,9 @@ struct fc_rport_priv *fc_rport_create(struct fc_lport *lport, u32 port_id) if (rdata) return rdata; - rdata = kzalloc(sizeof(*rdata) + lport->rport_priv_size, GFP_KERNEL); + if (lport->rport_priv_size > 0) + rport_priv_size = lport->rport_priv_size; + rdata = kzalloc(rport_priv_size, GFP_KERNEL); if (!rdata) return NULL; diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h index dc14b52577f7..2568cb0627ec 100644 --- a/include/scsi/libfcoe.h +++ b/include/scsi/libfcoe.h @@ -229,6 +229,7 @@ struct fcoe_fcf { * @vn_mac: VN_Node assigned MAC address for data */ struct fcoe_rport { + struct fc_rport_priv rdata; unsigned long time; u16 fcoe_len; u16 flags; From d478418703d6bcdd163d5d8127683c6c471539f0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 24 Jul 2019 11:00:56 +0200 Subject: [PATCH 128/277] scsi: fcoe: pass in fcoe_rport structure instead of fc_rport_priv Instead of using the generic 'fc_rport_priv' structure as argument and then having to painstakingly outcast this to fcoe_rport we should be passing the fcoe_rport structure itself and reduce complexity. Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/fcoe/fcoe_ctlr.c | 99 ++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 48 deletions(-) diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index fc32b5d76821..1791a393795d 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -2401,16 +2401,14 @@ static void fcoe_ctlr_vn_send_claim(struct fcoe_ctlr *fip) /** * fcoe_ctlr_vn_probe_req() - handle incoming VN2VN probe request. * @fip: The FCoE controller - * @rdata: parsed remote port with frport from the probe request + * @frport: parsed FCoE rport from the probe request * * Called with ctlr_mutex held. */ static void fcoe_ctlr_vn_probe_req(struct fcoe_ctlr *fip, - struct fc_rport_priv *rdata) + struct fcoe_rport *frport) { - struct fcoe_rport *frport = fcoe_ctlr_rport(rdata); - - if (rdata->ids.port_id != fip->port_id) + if (frport->rdata.ids.port_id != fip->port_id) return; switch (fip->state) { @@ -2430,7 +2428,7 @@ static void fcoe_ctlr_vn_probe_req(struct fcoe_ctlr *fip, * Probe's REC bit is not set. * If we don't reply, we will change our address. */ - if (fip->lp->wwpn > rdata->ids.port_name && + if (fip->lp->wwpn > frport->rdata.ids.port_name && !(frport->flags & FIP_FL_REC_OR_P2P)) { LIBFCOE_FIP_DBG(fip, "vn_probe_req: " "port_id collision\n"); @@ -2454,14 +2452,14 @@ static void fcoe_ctlr_vn_probe_req(struct fcoe_ctlr *fip, /** * fcoe_ctlr_vn_probe_reply() - handle incoming VN2VN probe reply. * @fip: The FCoE controller - * @rdata: parsed remote port with frport from the probe request + * @frport: parsed FCoE rport from the probe request * * Called with ctlr_mutex held. */ static void fcoe_ctlr_vn_probe_reply(struct fcoe_ctlr *fip, - struct fc_rport_priv *rdata) + struct fcoe_rport *frport) { - if (rdata->ids.port_id != fip->port_id) + if (frport->rdata.ids.port_id != fip->port_id) return; switch (fip->state) { case FIP_ST_VNMP_START: @@ -2484,11 +2482,11 @@ static void fcoe_ctlr_vn_probe_reply(struct fcoe_ctlr *fip, /** * fcoe_ctlr_vn_add() - Add a VN2VN entry to the list, based on a claim reply. * @fip: The FCoE controller - * @new: newly-parsed remote port with frport as a template for new rdata + * @new: newly-parsed FCoE rport as a template for new rdata * * Called with ctlr_mutex held. */ -static void fcoe_ctlr_vn_add(struct fcoe_ctlr *fip, struct fc_rport_priv *new) +static void fcoe_ctlr_vn_add(struct fcoe_ctlr *fip, struct fcoe_rport *new) { struct fc_lport *lport = fip->lp; struct fc_rport_priv *rdata; @@ -2496,7 +2494,7 @@ static void fcoe_ctlr_vn_add(struct fcoe_ctlr *fip, struct fc_rport_priv *new) struct fcoe_rport *frport; u32 port_id; - port_id = new->ids.port_id; + port_id = new->rdata.ids.port_id; if (port_id == fip->port_id) return; @@ -2513,22 +2511,28 @@ static void fcoe_ctlr_vn_add(struct fcoe_ctlr *fip, struct fc_rport_priv *new) rdata->disc_id = lport->disc.disc_id; ids = &rdata->ids; - if ((ids->port_name != -1 && ids->port_name != new->ids.port_name) || - (ids->node_name != -1 && ids->node_name != new->ids.node_name)) { + if ((ids->port_name != -1 && + ids->port_name != new->rdata.ids.port_name) || + (ids->node_name != -1 && + ids->node_name != new->rdata.ids.node_name)) { mutex_unlock(&rdata->rp_mutex); LIBFCOE_FIP_DBG(fip, "vn_add rport logoff %6.6x\n", port_id); fc_rport_logoff(rdata); mutex_lock(&rdata->rp_mutex); } - ids->port_name = new->ids.port_name; - ids->node_name = new->ids.node_name; + ids->port_name = new->rdata.ids.port_name; + ids->node_name = new->rdata.ids.node_name; mutex_unlock(&rdata->rp_mutex); frport = fcoe_ctlr_rport(rdata); LIBFCOE_FIP_DBG(fip, "vn_add rport %6.6x %s state %d\n", port_id, frport->fcoe_len ? "old" : "new", rdata->rp_state); - *frport = *fcoe_ctlr_rport(new); + frport->fcoe_len = new->fcoe_len; + frport->flags = new->flags; + frport->login_count = new->login_count; + memcpy(frport->enode_mac, new->enode_mac, ETH_ALEN); + memcpy(frport->vn_mac, new->vn_mac, ETH_ALEN); frport->time = 0; } @@ -2560,16 +2564,14 @@ static int fcoe_ctlr_vn_lookup(struct fcoe_ctlr *fip, u32 port_id, u8 *mac) /** * fcoe_ctlr_vn_claim_notify() - handle received FIP VN2VN Claim Notification * @fip: The FCoE controller - * @new: newly-parsed remote port with frport as a template for new rdata + * @new: newly-parsed FCoE rport as a template for new rdata * * Called with ctlr_mutex held. */ static void fcoe_ctlr_vn_claim_notify(struct fcoe_ctlr *fip, - struct fc_rport_priv *new) + struct fcoe_rport *new) { - struct fcoe_rport *frport = fcoe_ctlr_rport(new); - - if (frport->flags & FIP_FL_REC_OR_P2P) { + if (new->flags & FIP_FL_REC_OR_P2P) { LIBFCOE_FIP_DBG(fip, "send probe req for P2P/REC\n"); fcoe_ctlr_vn_send(fip, FIP_SC_VN_PROBE_REQ, fcoe_all_vn2vn, 0); return; @@ -2578,7 +2580,7 @@ static void fcoe_ctlr_vn_claim_notify(struct fcoe_ctlr *fip, case FIP_ST_VNMP_START: case FIP_ST_VNMP_PROBE1: case FIP_ST_VNMP_PROBE2: - if (new->ids.port_id == fip->port_id) { + if (new->rdata.ids.port_id == fip->port_id) { LIBFCOE_FIP_DBG(fip, "vn_claim_notify: " "restart, state %d\n", fip->state); @@ -2587,8 +2589,8 @@ static void fcoe_ctlr_vn_claim_notify(struct fcoe_ctlr *fip, break; case FIP_ST_VNMP_CLAIM: case FIP_ST_VNMP_UP: - if (new->ids.port_id == fip->port_id) { - if (new->ids.port_name > fip->lp->wwpn) { + if (new->rdata.ids.port_id == fip->port_id) { + if (new->rdata.ids.port_name > fip->lp->wwpn) { LIBFCOE_FIP_DBG(fip, "vn_claim_notify: " "restart, port_id collision\n"); fcoe_ctlr_vn_restart(fip); @@ -2600,15 +2602,16 @@ static void fcoe_ctlr_vn_claim_notify(struct fcoe_ctlr *fip, break; } LIBFCOE_FIP_DBG(fip, "vn_claim_notify: send reply to %x\n", - new->ids.port_id); - fcoe_ctlr_vn_send(fip, FIP_SC_VN_CLAIM_REP, frport->enode_mac, - min((u32)frport->fcoe_len, + new->rdata.ids.port_id); + fcoe_ctlr_vn_send(fip, FIP_SC_VN_CLAIM_REP, new->enode_mac, + min((u32)new->fcoe_len, fcoe_ctlr_fcoe_size(fip))); fcoe_ctlr_vn_add(fip, new); break; default: LIBFCOE_FIP_DBG(fip, "vn_claim_notify: " - "ignoring claim from %x\n", new->ids.port_id); + "ignoring claim from %x\n", + new->rdata.ids.port_id); break; } } @@ -2616,15 +2619,15 @@ static void fcoe_ctlr_vn_claim_notify(struct fcoe_ctlr *fip, /** * fcoe_ctlr_vn_claim_resp() - handle received Claim Response * @fip: The FCoE controller that received the frame - * @new: newly-parsed remote port with frport from the Claim Response + * @new: newly-parsed FCoE rport from the Claim Response * * Called with ctlr_mutex held. */ static void fcoe_ctlr_vn_claim_resp(struct fcoe_ctlr *fip, - struct fc_rport_priv *new) + struct fcoe_rport *new) { LIBFCOE_FIP_DBG(fip, "claim resp from from rport %x - state %s\n", - new->ids.port_id, fcoe_ctlr_state(fip->state)); + new->rdata.ids.port_id, fcoe_ctlr_state(fip->state)); if (fip->state == FIP_ST_VNMP_UP || fip->state == FIP_ST_VNMP_CLAIM) fcoe_ctlr_vn_add(fip, new); } @@ -2632,28 +2635,28 @@ static void fcoe_ctlr_vn_claim_resp(struct fcoe_ctlr *fip, /** * fcoe_ctlr_vn_beacon() - handle received beacon. * @fip: The FCoE controller that received the frame - * @new: newly-parsed remote port with frport from the Beacon + * @new: newly-parsed FCoE rport from the Beacon * * Called with ctlr_mutex held. */ static void fcoe_ctlr_vn_beacon(struct fcoe_ctlr *fip, - struct fc_rport_priv *new) + struct fcoe_rport *new) { struct fc_lport *lport = fip->lp; struct fc_rport_priv *rdata; struct fcoe_rport *frport; - frport = fcoe_ctlr_rport(new); - if (frport->flags & FIP_FL_REC_OR_P2P) { + if (new->flags & FIP_FL_REC_OR_P2P) { LIBFCOE_FIP_DBG(fip, "p2p beacon while in vn2vn mode\n"); fcoe_ctlr_vn_send(fip, FIP_SC_VN_PROBE_REQ, fcoe_all_vn2vn, 0); return; } - rdata = fc_rport_lookup(lport, new->ids.port_id); + rdata = fc_rport_lookup(lport, new->rdata.ids.port_id); if (rdata) { - if (rdata->ids.node_name == new->ids.node_name && - rdata->ids.port_name == new->ids.port_name) { + if (rdata->ids.node_name == new->rdata.ids.node_name && + rdata->ids.port_name == new->rdata.ids.port_name) { frport = fcoe_ctlr_rport(rdata); + LIBFCOE_FIP_DBG(fip, "beacon from rport %x\n", rdata->ids.port_id); if (!frport->time && fip->state == FIP_ST_VNMP_UP) { @@ -2676,7 +2679,7 @@ static void fcoe_ctlr_vn_beacon(struct fcoe_ctlr *fip, * Don't add the neighbor yet. */ LIBFCOE_FIP_DBG(fip, "beacon from new rport %x. sending claim notify\n", - new->ids.port_id); + new->rdata.ids.port_id); if (time_after(jiffies, fip->sol_time + msecs_to_jiffies(FIP_VN_ANN_WAIT))) fcoe_ctlr_vn_send_claim(fip); @@ -2761,19 +2764,19 @@ static int fcoe_ctlr_vn_recv(struct fcoe_ctlr *fip, struct sk_buff *skb) mutex_lock(&fip->ctlr_mutex); switch (sub) { case FIP_SC_VN_PROBE_REQ: - fcoe_ctlr_vn_probe_req(fip, &frport.rdata); + fcoe_ctlr_vn_probe_req(fip, &frport); break; case FIP_SC_VN_PROBE_REP: - fcoe_ctlr_vn_probe_reply(fip, &frport.rdata); + fcoe_ctlr_vn_probe_reply(fip, &frport); break; case FIP_SC_VN_CLAIM_NOTIFY: - fcoe_ctlr_vn_claim_notify(fip, &frport.rdata); + fcoe_ctlr_vn_claim_notify(fip, &frport); break; case FIP_SC_VN_CLAIM_REP: - fcoe_ctlr_vn_claim_resp(fip, &frport.rdata); + fcoe_ctlr_vn_claim_resp(fip, &frport); break; case FIP_SC_VN_BEACON: - fcoe_ctlr_vn_beacon(fip, &frport.rdata); + fcoe_ctlr_vn_beacon(fip, &frport); break; default: LIBFCOE_FIP_DBG(fip, "vn_recv unknown subcode %d\n", sub); @@ -2949,13 +2952,13 @@ static void fcoe_ctlr_vlan_send(struct fcoe_ctlr *fip, /** * fcoe_ctlr_vlan_disk_reply() - send FIP VLAN Discovery Notification. * @fip: The FCoE controller + * @frport: The newly-parsed FCoE rport from the Discovery Request * * Called with ctlr_mutex held. */ static void fcoe_ctlr_vlan_disc_reply(struct fcoe_ctlr *fip, - struct fc_rport_priv *rdata) + struct fcoe_rport *frport) { - struct fcoe_rport *frport = fcoe_ctlr_rport(rdata); enum fip_vlan_subcode sub = FIP_SC_VL_NOTE; if (fip->mode == FIP_MODE_VN2VN) @@ -2986,7 +2989,7 @@ static int fcoe_ctlr_vlan_recv(struct fcoe_ctlr *fip, struct sk_buff *skb) } mutex_lock(&fip->ctlr_mutex); if (sub == FIP_SC_VL_REQ) - fcoe_ctlr_vlan_disc_reply(fip, &frport.rdata); + fcoe_ctlr_vlan_disc_reply(fip, &frport); mutex_unlock(&fip->ctlr_mutex); drop: From c00f9c6b79f7e1c5caf774c38e9fd5dad2d2ef1c Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Wed, 12 Jun 2019 11:17:46 +0800 Subject: [PATCH 129/277] drm/i915/gvt: remove duplicate include of trace.h This removes duplicate include of trace.h. Found by Hariprasad Kelam with includecheck. Reported-by: Hariprasad Kelam Reviewed-by: Yan Zhao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/trace_points.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/trace_points.c b/drivers/gpu/drm/i915/gvt/trace_points.c index a3deed692b9c..fe552e877e09 100644 --- a/drivers/gpu/drm/i915/gvt/trace_points.c +++ b/drivers/gpu/drm/i915/gvt/trace_points.c @@ -28,8 +28,6 @@ * */ -#include "trace.h" - #ifndef __CHECKER__ #define CREATE_TRACE_POINTS #include "trace.h" From d18fd0576e05a4b03b588e131093b0437fccb75f Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Mon, 27 May 2019 13:45:51 +0800 Subject: [PATCH 130/277] drm/i915/gvt: Warning for invalid ggtt access Instead of silently return virtual ggtt entries that guest is allowed to access, this patch add extra range check. If guest read out of range, it will print a warning and return 0. If guest write out of range, the write will be dropped without any message. Reviewed-by: Zhenyu Wang Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 53115bdae12b..4b04af569c05 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2141,11 +2141,20 @@ static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; unsigned long index = off >> info->gtt_entry_size_shift; + unsigned long gma; struct intel_gvt_gtt_entry e; if (bytes != 4 && bytes != 8) return -EINVAL; + gma = index << I915_GTT_PAGE_SHIFT; + if (!intel_gvt_ggtt_validate_range(vgpu, + gma, 1 << I915_GTT_PAGE_SHIFT)) { + gvt_dbg_mm("read invalid ggtt at 0x%lx\n", gma); + memset(p_data, 0, bytes); + return 0; + } + ggtt_get_guest_entry(ggtt_mm, &e, index); memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)), bytes); From c25144098bee19b089c8a37c54517bf467f06403 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Mon, 27 May 2019 13:45:52 +0800 Subject: [PATCH 131/277] drm/i915/gvt: Don't use ggtt_validdate_range() with size=0 Use vgpu_gmadr_is_valid() directly instead. Reviewed-by: Zhenyu Wang Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/fb_decoder.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c index 65e847392aea..8bb292b01271 100644 --- a/drivers/gpu/drm/i915/gvt/fb_decoder.c +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c @@ -245,7 +245,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, plane->hw_format = fmt; plane->base = vgpu_vreg_t(vgpu, DSPSURF(pipe)) & I915_GTT_PAGE_MASK; - if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) + if (!vgpu_gmadr_is_valid(vgpu, plane->base)) return -EINVAL; plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); @@ -368,7 +368,7 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, alpha_plane, alpha_force); plane->base = vgpu_vreg_t(vgpu, CURBASE(pipe)) & I915_GTT_PAGE_MASK; - if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) + if (!vgpu_gmadr_is_valid(vgpu, plane->base)) return -EINVAL; plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); @@ -472,7 +472,7 @@ int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu, plane->drm_format = drm_format; plane->base = vgpu_vreg_t(vgpu, SPRSURF(pipe)) & I915_GTT_PAGE_MASK; - if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) + if (!vgpu_gmadr_is_valid(vgpu, plane->base)) return -EINVAL; plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); From 2089a76ade9005a06c5e08e8454f45f3625fdc1c Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Mon, 27 May 2019 13:45:53 +0800 Subject: [PATCH 132/277] drm/i915/gvt: Checking workload's gma earlier Workload contains RB and WA_CTX which are in ggtt space, if they aren't in valid ggtt space, the workload shouldn't be shadowed and scanned. So checking them earlier to avoid shadow them. Reviewed-by: Zhenyu Wang Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 10 ---------- drivers/gpu/drm/i915/gvt/scheduler.c | 28 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 6ea88270c818..b09dc315e2da 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2674,11 +2674,6 @@ static int scan_workload(struct intel_vgpu_workload *workload) gma_head == gma_tail) return 0; - if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) { - ret = -EINVAL; - goto out; - } - ret = ip_gma_set(&s, gma_head); if (ret) goto out; @@ -2724,11 +2719,6 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) s.workload = workload; s.is_ctx_wa = true; - if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) { - ret = -EINVAL; - goto out; - } - ret = ip_gma_set(&s, gma_head); if (ret) goto out; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 2144fb46d0e1..6469366c1753 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1492,6 +1492,12 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4); + if (!intel_gvt_ggtt_validate_range(vgpu, start, + _RING_CTL_BUF_SIZE(ctl))) { + gvt_vgpu_err("context contain invalid rb at: 0x%x\n", start); + return ERR_PTR(-EINVAL); + } + workload = alloc_workload(vgpu); if (IS_ERR(workload)) return workload; @@ -1516,9 +1522,31 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, workload->wa_ctx.indirect_ctx.size = (indirect_ctx & INDIRECT_CTX_SIZE_MASK) * CACHELINE_BYTES; + + if (workload->wa_ctx.indirect_ctx.size != 0) { + if (!intel_gvt_ggtt_validate_range(vgpu, + workload->wa_ctx.indirect_ctx.guest_gma, + workload->wa_ctx.indirect_ctx.size)) { + kmem_cache_free(s->workloads, workload); + gvt_vgpu_err("invalid wa_ctx at: 0x%lx\n", + workload->wa_ctx.indirect_ctx.guest_gma); + return ERR_PTR(-EINVAL); + } + } + workload->wa_ctx.per_ctx.guest_gma = per_ctx & PER_CTX_ADDR_MASK; workload->wa_ctx.per_ctx.valid = per_ctx & 1; + if (workload->wa_ctx.per_ctx.valid) { + if (!intel_gvt_ggtt_validate_range(vgpu, + workload->wa_ctx.per_ctx.guest_gma, + CACHELINE_BYTES)) { + kmem_cache_free(s->workloads, workload); + gvt_vgpu_err("invalid per_ctx at: 0x%lx\n", + workload->wa_ctx.per_ctx.guest_gma); + return ERR_PTR(-EINVAL); + } + } } gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n", From 7366aeb77cd840f3edea02c65065d40affaa7f45 Mon Sep 17 00:00:00 2001 From: Xiaolin Zhang Date: Thu, 18 Jul 2019 01:10:24 +0800 Subject: [PATCH 133/277] drm/i915/gvt: fix incorrect cache entry for guest page mapping GPU hang observed during the guest OCL conformance test which is caused by THP GTT feature used durning the test. It was observed the same GFN with different size (4K and 2M) requested from the guest in GVT. So during the guest page dma map stage, it is required to unmap first with orginal size and then remap again with requested size. Fixes: b901b252b6cf ("drm/i915/gvt: Add 2M huge gtt support") Cc: stable@vger.kernel.org Reviewed-by: Zhenyu Wang Signed-off-by: Xiaolin Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/kvmgt.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 144301b778df..23aa3e50cbf8 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1904,6 +1904,18 @@ static int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, entry = __gvt_cache_find_gfn(info->vgpu, gfn); if (!entry) { + ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size); + if (ret) + goto err_unlock; + + ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr, size); + if (ret) + goto err_unmap; + } else if (entry->size != size) { + /* the same gfn with different size: unmap and re-map */ + gvt_dma_unmap_page(vgpu, gfn, entry->dma_addr, entry->size); + __gvt_cache_remove_entry(vgpu, entry); + ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size); if (ret) goto err_unlock; From ef5b0b444e6297d03ac0bdc0c82f65396ef4dccd Mon Sep 17 00:00:00 2001 From: Xiaolin Zhang Date: Thu, 20 Jun 2019 10:29:24 -0400 Subject: [PATCH 134/277] drm/i915/gvt: grab runtime pm first for forcewake use in workload_thread, it should grab runtime pm wakelock and later uncore forcewake get will check rpm wakelock held successfully. otherwise, sometimes, rpm wakelock not hold and print call trace below: Call Trace: intel_uncore_forcewake_get+0x15/0x20 [i915] workload_thread+0x5f9/0x16f0 [i915] ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 ? __switch_to+0x85/0x3f0 ? __switch_to_asm+0x40/0x70 ? do_wait_intr_irq+0x90/0x90 kthread+0x121/0x140 ? intel_vgpu_clean_workloads+0x100/0x100 [i915] ? kthread_park+0x90/0x90 ret_from_fork+0x35/0x40 --[ end trace 86525f742a02e12c ]-- v2: adapted to use rpm structure. Fixes: 251d46b0875c ("drm/i915/gvt: Pin the per-engine GVT shadow contexts") Reviewed-by: Zhenyu Wang Signed-off-by: Xiaolin Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 6469366c1753..196b4155a309 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -990,6 +990,7 @@ static int workload_thread(void *priv) int ret; bool need_force_wake = (INTEL_GEN(gvt->dev_priv) >= 9); DEFINE_WAIT_FUNC(wait, woken_wake_function); + struct intel_runtime_pm *rpm = &gvt->dev_priv->runtime_pm; kfree(p); @@ -1013,6 +1014,8 @@ static int workload_thread(void *priv) workload->ring_id, workload, workload->vgpu->id); + intel_runtime_pm_get(rpm); + gvt_dbg_sched("ring id %d will dispatch workload %p\n", workload->ring_id, workload); @@ -1042,6 +1045,7 @@ complete: intel_uncore_forcewake_put(&gvt->dev_priv->uncore, FORCEWAKE_ALL); + intel_runtime_pm_put_unchecked(rpm); if (ret && (vgpu_is_vm_unhealthy(ret))) enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); } From 4187414808095f645ca0661f8dde77617e2e7cb3 Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Thu, 4 Jul 2019 16:45:06 +0800 Subject: [PATCH 135/277] drm/i915/gvt: Adding ppgtt to GVT GEM context after shadow pdps settled. Windows guest can't run after force-TDR with host log: ... gvt: vgpu 1: workload shadow ppgtt isn't ready gvt: vgpu 1: fail to dispatch workload, skip ... The error is raised by set_context_ppgtt_from_shadow(), when it checks and found the shadow_mm isn't marked as shadowed. In work thread before each submission, a shadow_mm is set to shadowed in: shadow_ppgtt_mm() <-intel_vgpu_pin_mm() <-prepare_workload() <-dispatch_workload() <-workload_thread() However checking whether or not shadow_mm is shadowed is prior to it: set_context_ppgtt_from_shadow() <-dispatch_workload() <-workload_thread() In normal case, create workload will check the existence of shadow_mm, if not it will create a new one and marked as shadowed. If already exist it will reuse the old one. Since shadow_mm is reused, checking of shadowed in set_context_ppgtt_from_shadow() actually always see the state set in creation, but not the state set in intel_vgpu_pin_mm(). When force-TDR, all engines are reset, since it's not dmlr level, all ppgtt_mm are invalidated but not destroyed. Invalidation will mark all reused shadow_mm as not shadowed but still keeps in ppgtt_mm_list_head. If workload submission phase those shadow_mm are reused with shadowed not set, then set_context_ppgtt_from_shadow() will report error. Pin for context after shadow_mm pinned and shadow pdps settled. v2: Move set_context_ppgtt_from_shadow() after prepare_workload(). (zhenyu) v3: Move set_context_ppgtt_from_shadow() after shadow pdps updated.(zhenyu) Fixes: 4f15665ccbba ("drm/i915: Add ppgtt to GVT GEM context") Cc: stable@vger.kernel.org Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 196b4155a309..9f3fd7d96a69 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -364,16 +364,13 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) wa_ctx->indirect_ctx.shadow_va = NULL; } -static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, - struct i915_gem_context *ctx) +static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, + struct i915_gem_context *ctx) { struct intel_vgpu_mm *mm = workload->shadow_mm; struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ctx->vm); int i = 0; - if (mm->type != INTEL_GVT_MM_PPGTT || !mm->ppgtt_mm.shadowed) - return -EINVAL; - if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { px_dma(ppgtt->pd) = mm->ppgtt_mm.shadow_pdps[0]; } else { @@ -384,8 +381,6 @@ static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i]; } } - - return 0; } static int @@ -614,6 +609,8 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) static int prepare_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; + int ring = workload->ring_id; int ret = 0; ret = intel_vgpu_pin_mm(workload->shadow_mm); @@ -622,8 +619,16 @@ static int prepare_workload(struct intel_vgpu_workload *workload) return ret; } + if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT || + !workload->shadow_mm->ppgtt_mm.shadowed) { + gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); + return -EINVAL; + } + update_shadow_pdps(workload); + set_context_ppgtt_from_shadow(workload, s->shadow[ring]->gem_context); + ret = intel_vgpu_sync_oos_pages(workload->vgpu); if (ret) { gvt_vgpu_err("fail to vgpu sync oos pages\n"); @@ -674,7 +679,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - struct intel_vgpu_submission *s = &vgpu->submission; struct i915_request *rq; int ring_id = workload->ring_id; int ret; @@ -685,13 +689,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) mutex_lock(&vgpu->vgpu_lock); mutex_lock(&dev_priv->drm.struct_mutex); - ret = set_context_ppgtt_from_shadow(workload, - s->shadow[ring_id]->gem_context); - if (ret < 0) { - gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); - goto err_req; - } - ret = intel_gvt_workload_req_alloc(workload); if (ret) goto err_req; From 5d78e1c2b7f4be00bbe62141603a631dc7812f35 Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Tue, 30 Jul 2019 17:24:36 +0800 Subject: [PATCH 136/277] ALSA: usb-audio: Fix gpf in snd_usb_pipe_sanity_check syzbot found the following crash on: general protection fault: 0000 [#1] SMP KASAN RIP: 0010:snd_usb_pipe_sanity_check+0x80/0x130 sound/usb/helper.c:75 Call Trace: snd_usb_motu_microbookii_communicate.constprop.0+0xa0/0x2fb sound/usb/quirks.c:1007 snd_usb_motu_microbookii_boot_quirk sound/usb/quirks.c:1051 [inline] snd_usb_apply_boot_quirk.cold+0x163/0x370 sound/usb/quirks.c:1280 usb_audio_probe+0x2ec/0x2010 sound/usb/card.c:576 usb_probe_interface+0x305/0x7a0 drivers/usb/core/driver.c:361 really_probe+0x281/0x650 drivers/base/dd.c:548 .... It was introduced in commit 801ebf1043ae for checking pipe and endpoint types. It is fixed by adding a check of the ep pointer in question. BugLink: https://syzkaller.appspot.com/bug?extid=d59c4387bfb6eced94e2 Reported-by: syzbot Fixes: 801ebf1043ae ("ALSA: usb-audio: Sanity checks for each pipe and EP types") Cc: Andrey Konovalov Signed-off-by: Hillf Danton Signed-off-by: Takashi Iwai --- sound/usb/helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/helper.c b/sound/usb/helper.c index 71d5f540334a..4c12cc5b53fd 100644 --- a/sound/usb/helper.c +++ b/sound/usb/helper.c @@ -72,7 +72,7 @@ int snd_usb_pipe_sanity_check(struct usb_device *dev, unsigned int pipe) struct usb_host_endpoint *ep; ep = usb_pipe_endpoint(dev, pipe); - if (usb_pipetype(pipe) != pipetypes[usb_endpoint_type(&ep->desc)]) + if (!ep || usb_pipetype(pipe) != pipetypes[usb_endpoint_type(&ep->desc)]) return -EINVAL; return 0; } From da1115fdbd6e86c62185cdd2b4bf7add39f2f82b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 29 Jul 2019 15:21:28 +0530 Subject: [PATCH 137/277] powerpc/nvdimm: Pick nearby online node if the device node is not online MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, nvdimm subsystem expects the device numa node for SCM device to be an online node. It also doesn't try to bring the device numa node online. Hence if we use a non-online numa node as device node we hit crashes like below. This is because we try to access uninitialized NODE_DATA in different code paths. cpu 0x0: Vector: 300 (Data Access) at [c0000000fac53170] pc: c0000000004bbc50: ___slab_alloc+0x120/0xca0 lr: c0000000004bc834: __slab_alloc+0x64/0xc0 sp: c0000000fac53400 msr: 8000000002009033 dar: 73e8 dsisr: 80000 current = 0xc0000000fabb6d80 paca = 0xc000000003870000 irqmask: 0x03 irq_happened: 0x01 pid = 7, comm = kworker/u16:0 Linux version 5.2.0-06234-g76bd729b2644 (kvaneesh@ltc-boston123) (gcc version 7.4.0 (Ubuntu 7.4.0-1ubuntu1~18.04.1)) #135 SMP Thu Jul 11 05:36:30 CDT 2019 enter ? for help [link register ] c0000000004bc834 __slab_alloc+0x64/0xc0 [c0000000fac53400] c0000000fac53480 (unreliable) [c0000000fac53500] c0000000004bc818 __slab_alloc+0x48/0xc0 [c0000000fac53560] c0000000004c30a0 __kmalloc_node_track_caller+0x3c0/0x6b0 [c0000000fac535d0] c000000000cfafe4 devm_kmalloc+0x74/0xc0 [c0000000fac53600] c000000000d69434 nd_region_activate+0x144/0x560 [c0000000fac536d0] c000000000d6b19c nd_region_probe+0x17c/0x370 [c0000000fac537b0] c000000000d6349c nvdimm_bus_probe+0x10c/0x230 [c0000000fac53840] c000000000cf3cc4 really_probe+0x254/0x4e0 [c0000000fac538d0] c000000000cf429c driver_probe_device+0x16c/0x1e0 [c0000000fac53950] c000000000cf0b44 bus_for_each_drv+0x94/0x130 [c0000000fac539b0] c000000000cf392c __device_attach+0xdc/0x200 [c0000000fac53a50] c000000000cf231c bus_probe_device+0x4c/0xf0 [c0000000fac53a90] c000000000ced268 device_add+0x528/0x810 [c0000000fac53b60] c000000000d62a58 nd_async_device_register+0x28/0xa0 [c0000000fac53bd0] c0000000001ccb8c async_run_entry_fn+0xcc/0x1f0 [c0000000fac53c50] c0000000001bcd9c process_one_work+0x46c/0x860 [c0000000fac53d20] c0000000001bd4f4 worker_thread+0x364/0x5f0 [c0000000fac53db0] c0000000001c7260 kthread+0x1b0/0x1c0 [c0000000fac53e20] c00000000000b954 ret_from_kernel_thread+0x5c/0x68 The patch tries to fix this by picking the nearest online node as the SCM node. This does have a problem of us losing the information that SCM node is equidistant from two other online nodes. If applications need to understand these fine-grained details we should express then like x86 does via /sys/devices/system/node/nodeX/accessY/initiators/ With the patch we get # numactl -H available: 2 nodes (0-1) node 0 cpus: node 0 size: 0 MB node 0 free: 0 MB node 1 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 node 1 size: 130865 MB node 1 free: 129130 MB node distances: node 0 1 0: 10 20 1: 20 10 # cat /sys/bus/nd/devices/region0/numa_node 0 # dmesg | grep papr_scm [ 91.332305] papr_scm ibm,persistent-memory:ibm,pmemory@44104001: Region registered with target node 2 and online node 0 Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190729095128.23707-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/platforms/pseries/papr_scm.c | 29 +++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 2c07908359b2..a5ac371a3f06 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -275,12 +275,32 @@ static const struct attribute_group *papr_scm_dimm_groups[] = { NULL, }; +static inline int papr_scm_node(int node) +{ + int min_dist = INT_MAX, dist; + int nid, min_node; + + if ((node == NUMA_NO_NODE) || node_online(node)) + return node; + + min_node = first_online_node; + for_each_online_node(nid) { + dist = node_distance(node, nid); + if (dist < min_dist) { + min_dist = dist; + min_node = nid; + } + } + return min_node; +} + static int papr_scm_nvdimm_init(struct papr_scm_priv *p) { struct device *dev = &p->pdev->dev; struct nd_mapping_desc mapping; struct nd_region_desc ndr_desc; unsigned long dimm_flags; + int target_nid, online_nid; p->bus_desc.ndctl = papr_scm_ndctl; p->bus_desc.module = THIS_MODULE; @@ -319,8 +339,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) memset(&ndr_desc, 0, sizeof(ndr_desc)); ndr_desc.attr_groups = region_attr_groups; - ndr_desc.numa_node = dev_to_node(&p->pdev->dev); - ndr_desc.target_node = ndr_desc.numa_node; + target_nid = dev_to_node(&p->pdev->dev); + online_nid = papr_scm_node(target_nid); + ndr_desc.numa_node = online_nid; + ndr_desc.target_node = target_nid; ndr_desc.res = &p->res; ndr_desc.of_node = p->dn; ndr_desc.provider_data = p; @@ -338,6 +360,9 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) ndr_desc.res, p->dn); goto err; } + if (target_nid != online_nid) + dev_info(dev, "Region registered with target node %d and online node %d", + target_nid, online_nid); return 0; From f4cc743a98136df3c3763050a0e8223b52d9a960 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 29 Jul 2019 15:12:16 +0800 Subject: [PATCH 138/277] drm/bridge: lvds-encoder: Fix build error while CONFIG_DRM_KMS_HELPER=m If DRM_LVDS_ENCODER=y but CONFIG_DRM_KMS_HELPER=m, build fails: drivers/gpu/drm/bridge/lvds-encoder.o: In function `lvds_encoder_probe': lvds-encoder.c:(.text+0x155): undefined reference to `devm_drm_panel_bridge_add' Reported-by: Hulk Robot Fixes: dbb58bfd9ae6 ("drm/bridge: Fix lvds-encoder since the panel_bridge rework.") Signed-off-by: YueHaibing Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20190729071216.27488-1-yuehaibing@huawei.com --- drivers/gpu/drm/bridge/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index ee777469293a..cc62603b87c5 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -48,6 +48,7 @@ config DRM_DUMB_VGA_DAC config DRM_LVDS_ENCODER tristate "Transparent parallel to LVDS encoder support" depends on OF + select DRM_KMS_HELPER select DRM_PANEL_BRIDGE help Support for transparent parallel to LVDS encoders that don't require From e1ae72a21e5f0d1846e26e3f5963930664702071 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 29 Jul 2019 17:05:20 +0800 Subject: [PATCH 139/277] drm/bridge: tc358764: Fix build error If CONFIG_DRM_TOSHIBA_TC358764=y but CONFIG_DRM_KMS_HELPER=m, building fails: drivers/gpu/drm/bridge/tc358764.o:(.rodata+0x228): undefined reference to `drm_atomic_helper_connector_reset' drivers/gpu/drm/bridge/tc358764.o:(.rodata+0x240): undefined reference to `drm_helper_probe_single_connector_modes' drivers/gpu/drm/bridge/tc358764.o:(.rodata+0x268): undefined reference to `drm_atomic_helper_connector_duplicate_state' drivers/gpu/drm/bridge/tc358764.o:(.rodata+0x270): undefined reference to `drm_atomic_helper_connector_destroy_state' Like TC358767, select DRM_KMS_HELPER to fix this, and change to select DRM_PANEL to avoid recursive dependency. Reported-by: Hulk Robot Fixes: f38b7cca6d0e ("drm/bridge: tc358764: Add DSI to LVDS bridge driver") Signed-off-by: YueHaibing Reviewed-by: Laurent Pinchart Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20190729090520.25968-1-yuehaibing@huawei.com --- drivers/gpu/drm/bridge/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index cc62603b87c5..e4e22bbae2a7 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -117,9 +117,10 @@ config DRM_THINE_THC63LVD1024 config DRM_TOSHIBA_TC358764 tristate "TC358764 DSI/LVDS bridge" - depends on DRM && DRM_PANEL depends on OF select DRM_MIPI_DSI + select DRM_KMS_HELPER + select DRM_PANEL help Toshiba TC358764 DSI/LVDS bridge driver. From 7db57e77586744af46c8bbf8f831bb2b941b7afc Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 31 Jul 2019 00:00:15 +1000 Subject: [PATCH 140/277] powerpc/spe: Mark expected switch fall-throughs Mark switch cases where we are expecting to fall through. Fixes errors such as below, seen with mpc85xx_defconfig: arch/powerpc/kernel/align.c: In function 'emulate_spe': arch/powerpc/kernel/align.c:178:8: error: this statement may fall through ret |= __get_user_inatomic(temp.v[3], p++); ^~ Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190730141917.21817-1-mpe@ellerman.id.au --- arch/powerpc/kernel/align.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 7107ad86de65..92045ed64976 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -176,9 +176,11 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, ret |= __get_user_inatomic(temp.v[1], p++); ret |= __get_user_inatomic(temp.v[2], p++); ret |= __get_user_inatomic(temp.v[3], p++); + /* fall through */ case 4: ret |= __get_user_inatomic(temp.v[4], p++); ret |= __get_user_inatomic(temp.v[5], p++); + /* fall through */ case 2: ret |= __get_user_inatomic(temp.v[6], p++); ret |= __get_user_inatomic(temp.v[7], p++); @@ -259,9 +261,11 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, ret |= __put_user_inatomic(data.v[1], p++); ret |= __put_user_inatomic(data.v[2], p++); ret |= __put_user_inatomic(data.v[3], p++); + /* fall through */ case 4: ret |= __put_user_inatomic(data.v[4], p++); ret |= __put_user_inatomic(data.v[5], p++); + /* fall through */ case 2: ret |= __put_user_inatomic(data.v[6], p++); ret |= __put_user_inatomic(data.v[7], p++); From 5241ab4cf42d3a93b933b55d3d53f43049081fa1 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 29 Jul 2019 18:15:17 +0900 Subject: [PATCH 141/277] kbuild: initialize CLANG_FLAGS correctly in the top Makefile CLANG_FLAGS is initialized by the following line: CLANG_FLAGS := --target=$(notdir $(CROSS_COMPILE:%-=%)) ..., which is run only when CROSS_COMPILE is set. Some build targets (bindeb-pkg etc.) recurse to the top Makefile. When you build the kernel with Clang but without CROSS_COMPILE, the same compiler flags such as -no-integrated-as are accumulated into CLANG_FLAGS. If you run 'make CC=clang' and then 'make CC=clang bindeb-pkg', Kbuild will recompile everything needlessly due to the build command change. Fix this by correctly initializing CLANG_FLAGS. Fixes: 238bcbc4e07f ("kbuild: consolidate Clang compiler flags") Cc: # v5.0+ Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor Acked-by: Nick Desaulniers --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fa0fbe7851ea..5ee6f6889869 100644 --- a/Makefile +++ b/Makefile @@ -472,6 +472,7 @@ KBUILD_CFLAGS_MODULE := -DMODULE KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds KBUILD_LDFLAGS := GCC_PLUGINS_CFLAGS := +CLANG_FLAGS := export ARCH SRCARCH CONFIG_SHELL HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE AS LD CC export CPP AR NM STRIP OBJCOPY OBJDUMP PAHOLE KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS @@ -519,7 +520,7 @@ endif ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),) ifneq ($(CROSS_COMPILE),) -CLANG_FLAGS := --target=$(notdir $(CROSS_COMPILE:%-=%)) +CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%)) GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)elfedit)) CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR) GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..) From b4f9a1a87a48c255bb90d8a6c3d555a1abb88130 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 17 Jul 2019 13:23:39 +0100 Subject: [PATCH 142/277] Btrfs: fix incremental send failure after deduplication When doing an incremental send operation we can fail if we previously did deduplication operations against a file that exists in both snapshots. In that case we will fail the send operation with -EIO and print a message to dmesg/syslog like the following: BTRFS error (device sdc): Send: inconsistent snapshot, found updated \ extent for inode 257 without updated inode item, send root is 258, \ parent root is 257 This requires that we deduplicate to the same file in both snapshots for the same amount of times on each snapshot. The issue happens because a deduplication only updates the iversion of an inode and does not update any other field of the inode, therefore if we deduplicate the file on each snapshot for the same amount of time, the inode will have the same iversion value (stored as the "sequence" field on the inode item) on both snapshots, therefore it will be seen as unchanged between in the send snapshot while there are new/updated/deleted extent items when comparing to the parent snapshot. This makes the send operation return -EIO and print an error message. Example reproducer: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt # Create our first file. The first half of the file has several 64Kb # extents while the second half as a single 512Kb extent. $ xfs_io -f -s -c "pwrite -S 0xb8 -b 64K 0 512K" /mnt/foo $ xfs_io -c "pwrite -S 0xb8 512K 512K" /mnt/foo # Create the base snapshot and the parent send stream from it. $ btrfs subvolume snapshot -r /mnt /mnt/mysnap1 $ btrfs send -f /tmp/1.snap /mnt/mysnap1 # Create our second file, that has exactly the same data as the first # file. $ xfs_io -f -c "pwrite -S 0xb8 0 1M" /mnt/bar # Create the second snapshot, used for the incremental send, before # doing the file deduplication. $ btrfs subvolume snapshot -r /mnt /mnt/mysnap2 # Now before creating the incremental send stream: # # 1) Deduplicate into a subrange of file foo in snapshot mysnap1. This # will drop several extent items and add a new one, also updating # the inode's iversion (sequence field in inode item) by 1, but not # any other field of the inode; # # 2) Deduplicate into a different subrange of file foo in snapshot # mysnap2. This will replace an extent item with a new one, also # updating the inode's iversion by 1 but not any other field of the # inode. # # After these two deduplication operations, the inode items, for file # foo, are identical in both snapshots, but we have different extent # items for this inode in both snapshots. We want to check this doesn't # cause send to fail with an error or produce an incorrect stream. $ xfs_io -r -c "dedupe /mnt/bar 0 0 512K" /mnt/mysnap1/foo $ xfs_io -r -c "dedupe /mnt/bar 512K 512K 512K" /mnt/mysnap2/foo # Create the incremental send stream. $ btrfs send -p /mnt/mysnap1 -f /tmp/2.snap /mnt/mysnap2 ERROR: send ioctl failed with -5: Input/output error This issue started happening back in 2015 when deduplication was updated to not update the inode's ctime and mtime and update only the iversion. Back then we would hit a BUG_ON() in send, but later in 2016 send was updated to return -EIO and print the error message instead of doing the BUG_ON(). A test case for fstests follows soon. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203933 Fixes: 1c919a5e13702c ("btrfs: don't update mtime/ctime on deduped inodes") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/send.c | 77 ++++++++++--------------------------------------- 1 file changed, 15 insertions(+), 62 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 69b59bf75882..c3c0c064c25d 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6322,68 +6322,21 @@ static int changed_extent(struct send_ctx *sctx, { int ret = 0; - if (sctx->cur_ino != sctx->cmp_key->objectid) { - - if (result == BTRFS_COMPARE_TREE_CHANGED) { - struct extent_buffer *leaf_l; - struct extent_buffer *leaf_r; - struct btrfs_file_extent_item *ei_l; - struct btrfs_file_extent_item *ei_r; - - leaf_l = sctx->left_path->nodes[0]; - leaf_r = sctx->right_path->nodes[0]; - ei_l = btrfs_item_ptr(leaf_l, - sctx->left_path->slots[0], - struct btrfs_file_extent_item); - ei_r = btrfs_item_ptr(leaf_r, - sctx->right_path->slots[0], - struct btrfs_file_extent_item); - - /* - * We may have found an extent item that has changed - * only its disk_bytenr field and the corresponding - * inode item was not updated. This case happens due to - * very specific timings during relocation when a leaf - * that contains file extent items is COWed while - * relocation is ongoing and its in the stage where it - * updates data pointers. So when this happens we can - * safely ignore it since we know it's the same extent, - * but just at different logical and physical locations - * (when an extent is fully replaced with a new one, we - * know the generation number must have changed too, - * since snapshot creation implies committing the current - * transaction, and the inode item must have been updated - * as well). - * This replacement of the disk_bytenr happens at - * relocation.c:replace_file_extents() through - * relocation.c:btrfs_reloc_cow_block(). - */ - if (btrfs_file_extent_generation(leaf_l, ei_l) == - btrfs_file_extent_generation(leaf_r, ei_r) && - btrfs_file_extent_ram_bytes(leaf_l, ei_l) == - btrfs_file_extent_ram_bytes(leaf_r, ei_r) && - btrfs_file_extent_compression(leaf_l, ei_l) == - btrfs_file_extent_compression(leaf_r, ei_r) && - btrfs_file_extent_encryption(leaf_l, ei_l) == - btrfs_file_extent_encryption(leaf_r, ei_r) && - btrfs_file_extent_other_encoding(leaf_l, ei_l) == - btrfs_file_extent_other_encoding(leaf_r, ei_r) && - btrfs_file_extent_type(leaf_l, ei_l) == - btrfs_file_extent_type(leaf_r, ei_r) && - btrfs_file_extent_disk_bytenr(leaf_l, ei_l) != - btrfs_file_extent_disk_bytenr(leaf_r, ei_r) && - btrfs_file_extent_disk_num_bytes(leaf_l, ei_l) == - btrfs_file_extent_disk_num_bytes(leaf_r, ei_r) && - btrfs_file_extent_offset(leaf_l, ei_l) == - btrfs_file_extent_offset(leaf_r, ei_r) && - btrfs_file_extent_num_bytes(leaf_l, ei_l) == - btrfs_file_extent_num_bytes(leaf_r, ei_r)) - return 0; - } - - inconsistent_snapshot_error(sctx, result, "extent"); - return -EIO; - } + /* + * We have found an extent item that changed without the inode item + * having changed. This can happen either after relocation (where the + * disk_bytenr of an extent item is replaced at + * relocation.c:replace_file_extents()) or after deduplication into a + * file in both the parent and send snapshots (where an extent item can + * get modified or replaced with a new one). Note that deduplication + * updates the inode item, but it only changes the iversion (sequence + * field in the inode item) of the inode, so if a file is deduplicated + * the same amount of times in both the parent and send snapshots, its + * iversion becames the same in both snapshots, whence the inode item is + * the same on both snapshots. + */ + if (sctx->cur_ino != sctx->cmp_key->objectid) + return 0; if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { if (result != BTRFS_COMPARE_TREE_DELETED) From cb2d3daddbfb6318d170e79aac1f7d5e4d49f0d7 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 25 Jul 2019 11:27:04 +0100 Subject: [PATCH 143/277] Btrfs: fix race leading to fs corruption after transaction abort When one transaction is finishing its commit, it is possible for another transaction to start and enter its initial commit phase as well. If the first ends up getting aborted, we have a small time window where the second transaction commit does not notice that the previous transaction aborted and ends up committing, writing a superblock that points to btrees that reference extent buffers (nodes and leafs) that were not persisted to disk. The consequence is that after mounting the filesystem again, we will be unable to load some btree nodes/leafs, either because the content on disk is either garbage (or just zeroes) or corresponds to the old content of a previouly COWed or deleted node/leaf, resulting in the well known error messages "parent transid verify failed on ...". The following sequence diagram illustrates how this can happen. CPU 1 CPU 2 btrfs_commit_transaction() (...) --> sets transaction state to TRANS_STATE_UNBLOCKED --> sets fs_info->running_transaction to NULL (...) btrfs_start_transaction() start_transaction() wait_current_trans() --> returns immediately because fs_info->running_transaction is NULL join_transaction() --> creates transaction N + 1 --> sets fs_info->running_transaction to transaction N + 1 --> adds transaction N + 1 to the fs_info->trans_list list --> returns transaction handle pointing to the new transaction N + 1 (...) btrfs_sync_file() btrfs_start_transaction() --> returns handle to transaction N + 1 (...) btrfs_write_and_wait_transaction() --> writeback of some extent buffer fails, returns an error btrfs_handle_fs_error() --> sets BTRFS_FS_STATE_ERROR in fs_info->fs_state --> jumps to label "scrub_continue" cleanup_transaction() btrfs_abort_transaction(N) --> sets BTRFS_FS_STATE_TRANS_ABORTED flag in fs_info->fs_state --> sets aborted field in the transaction and transaction handle structures, for transaction N only --> removes transaction from the list fs_info->trans_list btrfs_commit_transaction(N + 1) --> transaction N + 1 was not aborted, so it proceeds (...) --> sets the transaction's state to TRANS_STATE_COMMIT_START --> does not find the previous transaction (N) in the fs_info->trans_list, so it doesn't know that transaction was aborted, and the commit of transaction N + 1 proceeds (...) --> sets transaction N + 1 state to TRANS_STATE_UNBLOCKED btrfs_write_and_wait_transaction() --> succeeds writing all extent buffers created in the transaction N + 1 write_all_supers() --> succeeds --> we now have a superblock on disk that points to trees that refer to at least one extent buffer that was never persisted So fix this by updating the transaction commit path to check if the flag BTRFS_FS_STATE_TRANS_ABORTED is set on fs_info->fs_state if after setting the transaction to the TRANS_STATE_COMMIT_START we do not find any previous transaction in the fs_info->trans_list. If the flag is set, just fail the transaction commit with -EROFS, as we do in other places. The exact error code for the previous transaction abort was already logged and reported. Fixes: 49b25e0540904b ("btrfs: enhance transaction abort infrastructure") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3b8ae1a8f02d..39b7bcde3c6f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2037,6 +2037,16 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) } } else { spin_unlock(&fs_info->trans_lock); + /* + * The previous transaction was aborted and was already removed + * from the list of transactions at fs_info->trans_list. So we + * abort to prevent writing a new superblock that reflects a + * corrupt state (pointing to trees with unwritten nodes/leafs). + */ + if (test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state)) { + ret = -EROFS; + goto cleanup_transaction; + } } extwriter_counter_dec(cur_trans, trans->type); From a6d155d2e363f26290ffd50591169cb96c2a609e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 29 Jul 2019 09:37:10 +0100 Subject: [PATCH 144/277] Btrfs: fix deadlock between fiemap and transaction commits The fiemap handler locks a file range that can have unflushed delalloc, and after locking the range, it tries to attach to a running transaction. If the running transaction started its commit, that is, it is in state TRANS_STATE_COMMIT_START, and either the filesystem was mounted with the flushoncommit option or the transaction is creating a snapshot for the subvolume that contains the file that fiemap is operating on, we end up deadlocking. This happens because fiemap is blocked on the transaction, waiting for it to complete, and the transaction is waiting for the flushed dealloc to complete, which requires locking the file range that the fiemap task already locked. The following stack traces serve as an example of when this deadlock happens: (...) [404571.515510] Workqueue: btrfs-endio-write btrfs_endio_write_helper [btrfs] [404571.515956] Call Trace: [404571.516360] ? __schedule+0x3ae/0x7b0 [404571.516730] schedule+0x3a/0xb0 [404571.517104] lock_extent_bits+0x1ec/0x2a0 [btrfs] [404571.517465] ? remove_wait_queue+0x60/0x60 [404571.517832] btrfs_finish_ordered_io+0x292/0x800 [btrfs] [404571.518202] normal_work_helper+0xea/0x530 [btrfs] [404571.518566] process_one_work+0x21e/0x5c0 [404571.518990] worker_thread+0x4f/0x3b0 [404571.519413] ? process_one_work+0x5c0/0x5c0 [404571.519829] kthread+0x103/0x140 [404571.520191] ? kthread_create_worker_on_cpu+0x70/0x70 [404571.520565] ret_from_fork+0x3a/0x50 [404571.520915] kworker/u8:6 D 0 31651 2 0x80004000 [404571.521290] Workqueue: btrfs-flush_delalloc btrfs_flush_delalloc_helper [btrfs] (...) [404571.537000] fsstress D 0 13117 13115 0x00004000 [404571.537263] Call Trace: [404571.537524] ? __schedule+0x3ae/0x7b0 [404571.537788] schedule+0x3a/0xb0 [404571.538066] wait_current_trans+0xc8/0x100 [btrfs] [404571.538349] ? remove_wait_queue+0x60/0x60 [404571.538680] start_transaction+0x33c/0x500 [btrfs] [404571.539076] btrfs_check_shared+0xa3/0x1f0 [btrfs] [404571.539513] ? extent_fiemap+0x2ce/0x650 [btrfs] [404571.539866] extent_fiemap+0x2ce/0x650 [btrfs] [404571.540170] do_vfs_ioctl+0x526/0x6f0 [404571.540436] ksys_ioctl+0x70/0x80 [404571.540734] __x64_sys_ioctl+0x16/0x20 [404571.540997] do_syscall_64+0x60/0x1d0 [404571.541279] entry_SYSCALL_64_after_hwframe+0x49/0xbe (...) [404571.543729] btrfs D 0 14210 14208 0x00004000 [404571.544023] Call Trace: [404571.544275] ? __schedule+0x3ae/0x7b0 [404571.544526] ? wait_for_completion+0x112/0x1a0 [404571.544795] schedule+0x3a/0xb0 [404571.545064] schedule_timeout+0x1ff/0x390 [404571.545351] ? lock_acquire+0xa6/0x190 [404571.545638] ? wait_for_completion+0x49/0x1a0 [404571.545890] ? wait_for_completion+0x112/0x1a0 [404571.546228] wait_for_completion+0x131/0x1a0 [404571.546503] ? wake_up_q+0x70/0x70 [404571.546775] btrfs_wait_ordered_extents+0x27c/0x400 [btrfs] [404571.547159] btrfs_commit_transaction+0x3b0/0xae0 [btrfs] [404571.547449] ? btrfs_mksubvol+0x4a4/0x640 [btrfs] [404571.547703] ? remove_wait_queue+0x60/0x60 [404571.547969] btrfs_mksubvol+0x605/0x640 [btrfs] [404571.548226] ? __sb_start_write+0xd4/0x1c0 [404571.548512] ? mnt_want_write_file+0x24/0x50 [404571.548789] btrfs_ioctl_snap_create_transid+0x169/0x1a0 [btrfs] [404571.549048] btrfs_ioctl_snap_create_v2+0x11d/0x170 [btrfs] [404571.549307] btrfs_ioctl+0x133f/0x3150 [btrfs] [404571.549549] ? mem_cgroup_charge_statistics+0x4c/0xd0 [404571.549792] ? mem_cgroup_commit_charge+0x84/0x4b0 [404571.550064] ? __handle_mm_fault+0xe3e/0x11f0 [404571.550306] ? do_raw_spin_unlock+0x49/0xc0 [404571.550608] ? _raw_spin_unlock+0x24/0x30 [404571.550976] ? __handle_mm_fault+0xedf/0x11f0 [404571.551319] ? do_vfs_ioctl+0xa2/0x6f0 [404571.551659] ? btrfs_ioctl_get_supported_features+0x30/0x30 [btrfs] [404571.552087] do_vfs_ioctl+0xa2/0x6f0 [404571.552355] ksys_ioctl+0x70/0x80 [404571.552621] __x64_sys_ioctl+0x16/0x20 [404571.552864] do_syscall_64+0x60/0x1d0 [404571.553104] entry_SYSCALL_64_after_hwframe+0x49/0xbe (...) If we were joining the transaction instead of attaching to it, we would not risk a deadlock because a join only blocks if the transaction is in a state greater then or equals to TRANS_STATE_COMMIT_DOING, and the delalloc flush performed by a transaction is done before it reaches that state, when it is in the state TRANS_STATE_COMMIT_START. However a transaction join is intended for use cases where we do modify the filesystem, and fiemap only needs to peek at delayed references from the current transaction in order to determine if extents are shared, and, besides that, when there is no current transaction or when it blocks to wait for a current committing transaction to complete, it creates a new transaction without reserving any space. Such unnecessary transactions, besides doing unnecessary IO, can cause transaction aborts (-ENOSPC) and unnecessary rotation of the precious backup roots. So fix this by adding a new transaction join variant, named join_nostart, which behaves like the regular join, but it does not create a transaction when none currently exists or after waiting for a committing transaction to complete. Fixes: 03628cdbc64db6 ("Btrfs: do not start a transaction during fiemap") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 2 +- fs/btrfs/transaction.c | 22 ++++++++++++++++++---- fs/btrfs/transaction.h | 3 +++ 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 89116afda7a2..e5d85311d5d5 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1483,7 +1483,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr, ulist_init(roots); ulist_init(tmp); - trans = btrfs_attach_transaction(root); + trans = btrfs_join_transaction_nostart(root); if (IS_ERR(trans)) { if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) { ret = PTR_ERR(trans); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 39b7bcde3c6f..e3adb714c04b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -28,15 +28,18 @@ static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { [TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH), [TRANS_STATE_COMMIT_DOING] = (__TRANS_START | __TRANS_ATTACH | - __TRANS_JOIN), + __TRANS_JOIN | + __TRANS_JOIN_NOSTART), [TRANS_STATE_UNBLOCKED] = (__TRANS_START | __TRANS_ATTACH | __TRANS_JOIN | - __TRANS_JOIN_NOLOCK), + __TRANS_JOIN_NOLOCK | + __TRANS_JOIN_NOSTART), [TRANS_STATE_COMPLETED] = (__TRANS_START | __TRANS_ATTACH | __TRANS_JOIN | - __TRANS_JOIN_NOLOCK), + __TRANS_JOIN_NOLOCK | + __TRANS_JOIN_NOSTART), }; void btrfs_put_transaction(struct btrfs_transaction *transaction) @@ -543,7 +546,8 @@ again: ret = join_transaction(fs_info, type); if (ret == -EBUSY) { wait_current_trans(fs_info); - if (unlikely(type == TRANS_ATTACH)) + if (unlikely(type == TRANS_ATTACH || + type == TRANS_JOIN_NOSTART)) ret = -ENOENT; } } while (ret == -EBUSY); @@ -659,6 +663,16 @@ struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root BTRFS_RESERVE_NO_FLUSH, true); } +/* + * Similar to regular join but it never starts a transaction when none is + * running or after waiting for the current one to finish. + */ +struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root) +{ + return start_transaction(root, 0, TRANS_JOIN_NOSTART, + BTRFS_RESERVE_NO_FLUSH, true); +} + /* * btrfs_attach_transaction() - catch the running transaction * diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 527ea94b57d9..2c5a6f6e5bb0 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -94,11 +94,13 @@ struct btrfs_transaction { #define __TRANS_JOIN (1U << 11) #define __TRANS_JOIN_NOLOCK (1U << 12) #define __TRANS_DUMMY (1U << 13) +#define __TRANS_JOIN_NOSTART (1U << 14) #define TRANS_START (__TRANS_START | __TRANS_FREEZABLE) #define TRANS_ATTACH (__TRANS_ATTACH) #define TRANS_JOIN (__TRANS_JOIN | __TRANS_FREEZABLE) #define TRANS_JOIN_NOLOCK (__TRANS_JOIN_NOLOCK) +#define TRANS_JOIN_NOSTART (__TRANS_JOIN_NOSTART) #define TRANS_EXTWRITERS (__TRANS_START | __TRANS_ATTACH) @@ -183,6 +185,7 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( int min_factor); struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); +struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_attach_transaction_barrier( struct btrfs_root *root); From eeebce1862970653cdf5c01e98bc669edd8f529a Mon Sep 17 00:00:00 2001 From: Don Brace Date: Wed, 24 Jul 2019 17:08:06 -0500 Subject: [PATCH 145/277] scsi: hpsa: correct scsi command status issue after reset Reviewed-by: Bader Ali - Saleh Reviewed-by: Scott Teel Reviewed-by: Scott Benesh Reviewed-by: Kevin Barnett Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 43a6b5350775..89e71ebc5964 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -2334,6 +2334,8 @@ static int handle_ioaccel_mode2_error(struct ctlr_info *h, case IOACCEL2_SERV_RESPONSE_COMPLETE: switch (c2->error_data.status) { case IOACCEL2_STATUS_SR_TASK_COMP_GOOD: + if (cmd) + cmd->result = 0; break; case IOACCEL2_STATUS_SR_TASK_COMP_CHK_COND: cmd->result |= SAM_STAT_CHECK_CONDITION; @@ -2483,8 +2485,10 @@ static void process_ioaccel2_completion(struct ctlr_info *h, /* check for good status */ if (likely(c2->error_data.serv_response == 0 && - c2->error_data.status == 0)) + c2->error_data.status == 0)) { + cmd->result = 0; return hpsa_cmd_free_and_done(h, c, cmd); + } /* * Any RAID offload error results in retry which will use @@ -5653,6 +5657,12 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd) if (c == NULL) return SCSI_MLQUEUE_DEVICE_BUSY; + /* + * This is necessary because the SML doesn't zero out this field during + * error recovery. + */ + cmd->result = 0; + /* * Call alternate submit routine for I/O accelerated commands. * Retries always go down the normal I/O path. From 18a56d699d174f3ac41f2ea86e1ca21f98b01d8f Mon Sep 17 00:00:00 2001 From: Don Brace Date: Wed, 24 Jul 2019 17:08:12 -0500 Subject: [PATCH 146/277] scsi: hpsa: remove printing internal cdb on tag collision Remove racy printing of internal commands. Completion thread can be cleaning up the command in parallel. Reviewed-by: Bader Ali - Saleh Reviewed-by: Scott Teel Reviewed-by: Scott Benesh Reviewed-by: Kevin Barnett Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 89e71ebc5964..bba099e53266 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -6091,8 +6091,6 @@ static struct CommandList *cmd_tagged_alloc(struct ctlr_info *h, if (idx != h->last_collision_tag) { /* Print once per tag */ dev_warn(&h->pdev->dev, "%s: tag collision (tag=%d)\n", __func__, idx); - if (c->scsi_cmd != NULL) - scsi_print_command(c->scsi_cmd); if (scmd) scsi_print_command(scmd); h->last_collision_tag = idx; From df9a606184bfdb5ae3ca9d226184e9489f5c24f7 Mon Sep 17 00:00:00 2001 From: Suganath Prabu Date: Tue, 30 Jul 2019 03:43:57 -0400 Subject: [PATCH 147/277] scsi: mpt3sas: Use 63-bit DMA addressing on SAS35 HBA Although SAS3 & SAS3.5 IT HBA controllers support 64-bit DMA addressing, as per hardware design, if DMA-able range contains all 64-bits set (0xFFFFFFFF-FFFFFFFF) then it results in a firmware fault. E.g. SGE's start address is 0xFFFFFFFF-FFFF000 and data length is 0x1000 bytes. when HBA tries to DMA the data at 0xFFFFFFFF-FFFFFFFF location then HBA will fault the firmware. Driver will set 63-bit DMA mask to ensure the above address will not be used. Cc: # 5.1.20+ Signed-off-by: Suganath Prabu Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 684662888792..050c0f029ef9 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2703,6 +2703,8 @@ _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev) { u64 required_mask, coherent_mask; struct sysinfo s; + /* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */ + int dma_mask = (ioc->hba_mpi_version_belonged > MPI2_VERSION) ? 63 : 64; if (ioc->is_mcpu_endpoint) goto try_32bit; @@ -2712,17 +2714,17 @@ _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev) goto try_32bit; if (ioc->dma_mask) - coherent_mask = DMA_BIT_MASK(64); + coherent_mask = DMA_BIT_MASK(dma_mask); else coherent_mask = DMA_BIT_MASK(32); - if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) || + if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(dma_mask)) || dma_set_coherent_mask(&pdev->dev, coherent_mask)) goto try_32bit; ioc->base_add_sg_single = &_base_add_sg_single_64; ioc->sge_size = sizeof(Mpi2SGESimple64_t); - ioc->dma_mask = 64; + ioc->dma_mask = dma_mask; goto out; try_32bit: @@ -2744,7 +2746,7 @@ static int _base_change_consistent_dma_mask(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev) { - if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) { + if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(ioc->dma_mask))) { if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) return -ENODEV; } @@ -4989,7 +4991,7 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) total_sz += sz; } while (ioc->rdpq_array_enable && (++i < ioc->reply_queue_count)); - if (ioc->dma_mask == 64) { + if (ioc->dma_mask > 32) { if (_base_change_consistent_dma_mask(ioc, ioc->pdev) != 0) { ioc_warn(ioc, "no suitable consistent DMA mask for %s\n", pci_name(ioc->pdev)); From e82f04ec6ba91065fd33a6201ffd7cab840e1475 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Mon, 29 Jul 2019 16:44:51 +0800 Subject: [PATCH 148/277] scsi: qla2xxx: Fix possible fcport null-pointer dereferences In qla2x00_alloc_fcport(), fcport is assigned to NULL in the error handling code on line 4880: fcport = NULL; Then fcport is used on lines 4883-4886: INIT_WORK(&fcport->del_work, qla24xx_delete_sess_fn); INIT_WORK(&fcport->reg_work, qla_register_fcport_fn); INIT_LIST_HEAD(&fcport->gnl_entry); INIT_LIST_HEAD(&fcport->list); Thus, possible null-pointer dereferences may occur. To fix these bugs, qla2x00_alloc_fcport() directly returns NULL in the error handling code. These bugs are found by a static analysis tool STCheck written by us. Signed-off-by: Jia-Ju Bai Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 4059655639d9..da83034d4759 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -4877,7 +4877,7 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags) ql_log(ql_log_warn, vha, 0xd049, "Failed to allocate ct_sns request.\n"); kfree(fcport); - fcport = NULL; + return NULL; } INIT_WORK(&fcport->del_work, qla24xx_delete_sess_fn); From 30b692d3b390c6fe78a5064be0c4bbd44a41be59 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 29 Jul 2019 17:48:24 +0200 Subject: [PATCH 149/277] exit: make setting exit_state consistent Since commit b191d6491be6 ("pidfd: fix a poll race when setting exit_state") we unconditionally set exit_state to EXIT_ZOMBIE before calling into do_notify_parent(). This was done to eliminate a race when querying exit_state in do_notify_pidfd(). Back then we decided to do the absolute minimal thing to fix this and not touch the rest of the exit_notify() function where exit_state is set. Since this fix has not caused any issues change the setting of exit_state to EXIT_DEAD in the autoreap case to account for the fact hat exit_state is set to EXIT_ZOMBIE unconditionally. This fix was planned but also explicitly requested in [1] and makes the whole code more consistent. /* References */ [1]: https://lore.kernel.org/lkml/CAHk-=wigcxGFR2szue4wavJtH5cYTTeNES=toUBVGsmX0rzX+g@mail.gmail.com Signed-off-by: Christian Brauner Acked-by: Oleg Nesterov Cc: Linus Torvalds --- kernel/exit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index 4436158a6d30..5b4a5dcce8f8 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -734,9 +734,10 @@ static void exit_notify(struct task_struct *tsk, int group_dead) autoreap = true; } - tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE; - if (tsk->exit_state == EXIT_DEAD) + if (autoreap) { + tsk->exit_state = EXIT_DEAD; list_add(&tsk->ptrace_entry, &dead); + } /* mt-exec, de_thread() is waiting for group leader */ if (unlikely(tsk->signal->notify_count < 0)) From afa1d96d1430c2138c545fb76e6dcb21222098d4 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Tue, 30 Jul 2019 11:28:20 -0700 Subject: [PATCH 150/277] xfs: Fix possible null-pointer dereferences in xchk_da_btree_block_check_sibling() In xchk_da_btree_block_check_sibling(), there is an if statement on line 274 to check whether ds->state->altpath.blk[level].bp is NULL: if (ds->state->altpath.blk[level].bp) When ds->state->altpath.blk[level].bp is NULL, it is used on line 281: xfs_trans_brelse(..., ds->state->altpath.blk[level].bp); struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); Thus, possible null-pointer dereferences may occur. To fix these bugs, ds->state->altpath.blk[level].bp is checked before being used. These bugs are found by a static analysis tool STCheck written by us. Signed-off-by: Jia-Ju Bai Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/dabtree.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index 94c4f1de1922..77ff9f97bcda 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -278,7 +278,11 @@ xchk_da_btree_block_check_sibling( /* Compare upper level pointer to sibling pointer. */ if (ds->state->altpath.blk[level].blkno != sibling) xchk_da_set_corrupt(ds, level); - xfs_trans_brelse(ds->dargs.trans, ds->state->altpath.blk[level].bp); + if (ds->state->altpath.blk[level].bp) { + xfs_trans_brelse(ds->dargs.trans, + ds->state->altpath.blk[level].bp); + ds->state->altpath.blk[level].bp = NULL; + } out: return error; } From 89e524c04fa966330e2e80ab2bc50b9944c5847a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 30 Jul 2019 13:10:14 +0200 Subject: [PATCH 151/277] loop: Fix mount(2) failure due to race with LOOP_SET_FD Commit 33ec3e53e7b1 ("loop: Don't change loop device under exclusive opener") made LOOP_SET_FD ioctl acquire exclusive block device reference while it updates loop device binding. However this can make perfectly valid mount(2) fail with EBUSY due to racing LOOP_SET_FD holding temporarily the exclusive bdev reference in cases like this: for i in {a..z}{a..z}; do dd if=/dev/zero of=$i.image bs=1k count=0 seek=1024 mkfs.ext2 $i.image mkdir mnt$i done echo "Run" for i in {a..z}{a..z}; do mount -o loop -t ext2 $i.image mnt$i & done Fix the problem by not getting full exclusive bdev reference in LOOP_SET_FD but instead just mark the bdev as being claimed while we update the binding information. This just blocks new exclusive openers instead of failing them with EBUSY thus fixing the problem. Fixes: 33ec3e53e7b1 ("loop: Don't change loop device under exclusive opener") Cc: stable@vger.kernel.org Tested-by: Kai-Heng Feng Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- drivers/block/loop.c | 16 +++++---- fs/block_dev.c | 83 +++++++++++++++++++++++++++++++------------- include/linux/fs.h | 6 ++++ 3 files changed, 73 insertions(+), 32 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 44c9985f352a..3036883fc9f8 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -924,6 +924,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, struct file *file; struct inode *inode; struct address_space *mapping; + struct block_device *claimed_bdev = NULL; int lo_flags = 0; int error; loff_t size; @@ -942,10 +943,11 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, * here to avoid changing device under exclusive owner. */ if (!(mode & FMODE_EXCL)) { - bdgrab(bdev); - error = blkdev_get(bdev, mode | FMODE_EXCL, loop_set_fd); - if (error) + claimed_bdev = bd_start_claiming(bdev, loop_set_fd); + if (IS_ERR(claimed_bdev)) { + error = PTR_ERR(claimed_bdev); goto out_putf; + } } error = mutex_lock_killable(&loop_ctl_mutex); @@ -1015,15 +1017,15 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, mutex_unlock(&loop_ctl_mutex); if (partscan) loop_reread_partitions(lo, bdev); - if (!(mode & FMODE_EXCL)) - blkdev_put(bdev, mode | FMODE_EXCL); + if (claimed_bdev) + bd_abort_claiming(bdev, claimed_bdev, loop_set_fd); return 0; out_unlock: mutex_unlock(&loop_ctl_mutex); out_bdev: - if (!(mode & FMODE_EXCL)) - blkdev_put(bdev, mode | FMODE_EXCL); + if (claimed_bdev) + bd_abort_claiming(bdev, claimed_bdev, loop_set_fd); out_putf: fput(file); out: diff --git a/fs/block_dev.c b/fs/block_dev.c index c2a85b587922..22591bad9353 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1181,8 +1181,7 @@ static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno) * Pointer to the block device containing @bdev on success, ERR_PTR() * value on failure. */ -static struct block_device *bd_start_claiming(struct block_device *bdev, - void *holder) +struct block_device *bd_start_claiming(struct block_device *bdev, void *holder) { struct gendisk *disk; struct block_device *whole; @@ -1229,6 +1228,62 @@ static struct block_device *bd_start_claiming(struct block_device *bdev, return ERR_PTR(err); } } +EXPORT_SYMBOL(bd_start_claiming); + +static void bd_clear_claiming(struct block_device *whole, void *holder) +{ + lockdep_assert_held(&bdev_lock); + /* tell others that we're done */ + BUG_ON(whole->bd_claiming != holder); + whole->bd_claiming = NULL; + wake_up_bit(&whole->bd_claiming, 0); +} + +/** + * bd_finish_claiming - finish claiming of a block device + * @bdev: block device of interest + * @whole: whole block device (returned from bd_start_claiming()) + * @holder: holder that has claimed @bdev + * + * Finish exclusive open of a block device. Mark the device as exlusively + * open by the holder and wake up all waiters for exclusive open to finish. + */ +void bd_finish_claiming(struct block_device *bdev, struct block_device *whole, + void *holder) +{ + spin_lock(&bdev_lock); + BUG_ON(!bd_may_claim(bdev, whole, holder)); + /* + * Note that for a whole device bd_holders will be incremented twice, + * and bd_holder will be set to bd_may_claim before being set to holder + */ + whole->bd_holders++; + whole->bd_holder = bd_may_claim; + bdev->bd_holders++; + bdev->bd_holder = holder; + bd_clear_claiming(whole, holder); + spin_unlock(&bdev_lock); +} +EXPORT_SYMBOL(bd_finish_claiming); + +/** + * bd_abort_claiming - abort claiming of a block device + * @bdev: block device of interest + * @whole: whole block device (returned from bd_start_claiming()) + * @holder: holder that has claimed @bdev + * + * Abort claiming of a block device when the exclusive open failed. This can be + * also used when exclusive open is not actually desired and we just needed + * to block other exclusive openers for a while. + */ +void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, + void *holder) +{ + spin_lock(&bdev_lock); + bd_clear_claiming(whole, holder); + spin_unlock(&bdev_lock); +} +EXPORT_SYMBOL(bd_abort_claiming); #ifdef CONFIG_SYSFS struct bd_holder_disk { @@ -1698,29 +1753,7 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) /* finish claiming */ mutex_lock(&bdev->bd_mutex); - spin_lock(&bdev_lock); - - if (!res) { - BUG_ON(!bd_may_claim(bdev, whole, holder)); - /* - * Note that for a whole device bd_holders - * will be incremented twice, and bd_holder - * will be set to bd_may_claim before being - * set to holder - */ - whole->bd_holders++; - whole->bd_holder = bd_may_claim; - bdev->bd_holders++; - bdev->bd_holder = holder; - } - - /* tell others that we're done */ - BUG_ON(whole->bd_claiming != holder); - whole->bd_claiming = NULL; - wake_up_bit(&whole->bd_claiming, 0); - - spin_unlock(&bdev_lock); - + bd_finish_claiming(bdev, whole, holder); /* * Block event polling for write claims if requested. Any * write holder makes the write_holder state stick until diff --git a/include/linux/fs.h b/include/linux/fs.h index 56b8e358af5c..997a530ff4e9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2598,6 +2598,12 @@ extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder); extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); +extern struct block_device *bd_start_claiming(struct block_device *bdev, + void *holder); +extern void bd_finish_claiming(struct block_device *bdev, + struct block_device *whole, void *holder); +extern void bd_abort_claiming(struct block_device *bdev, + struct block_device *whole, void *holder); extern void blkdev_put(struct block_device *bdev, fmode_t mode); extern int __blkdev_reread_part(struct block_device *bdev); extern int blkdev_reread_part(struct block_device *bdev); From a9446a906f52292c52ecbd5be78eaa4d8395756c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 28 Jul 2019 15:12:52 +0200 Subject: [PATCH 152/277] lib/vdso/32: Remove inconsistent NULL pointer checks The 32bit variants of vdso_clock_gettime()/getres() have a NULL pointer check for the timespec pointer. That's inconsistent vs. 64bit. But the vdso implementation will never be consistent versus the syscall because the only case which it can handle is NULL. Any other invalid pointer will cause a segfault. So special casing NULL is not really useful. Remove it along with the superflouos syscall fallback invocation as that will return -EFAULT anyway. That also gets rid of the dubious typecast which only works because the pointer is NULL. Fixes: 00b26474c2f1 ("lib/vdso: Provide generic VDSO implementation") Signed-off-by: Thomas Gleixner Tested-by: Vincenzo Frascino Reviewed-by: Vincenzo Frascino Reviewed-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20190728131648.587523358@linutronix.de --- lib/vdso/gettimeofday.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 2d1c1f241fd9..e28f5a607a5f 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -115,9 +115,6 @@ __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res) struct __kernel_timespec ts; int ret; - if (res == NULL) - goto fallback; - ret = __cvdso_clock_gettime(clock, &ts); if (ret == 0) { @@ -126,9 +123,6 @@ __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res) } return ret; - -fallback: - return clock_gettime_fallback(clock, (struct __kernel_timespec *)res); } static __maybe_unused int @@ -204,10 +198,8 @@ int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res) goto fallback; } - if (res) { - res->tv_sec = 0; - res->tv_nsec = ns; - } + res->tv_sec = 0; + res->tv_nsec = ns; return 0; @@ -221,9 +213,6 @@ __cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res) struct __kernel_timespec ts; int ret; - if (res == NULL) - goto fallback; - ret = __cvdso_clock_getres(clock, &ts); if (ret == 0) { @@ -232,8 +221,5 @@ __cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res) } return ret; - -fallback: - return clock_getres_fallback(clock, (struct __kernel_timespec *)res); } #endif /* VDSO_HAS_CLOCK_GETRES */ From 502a590a170b3b3d0ad998ee0b639ac0b3db1dfa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 28 Jul 2019 15:12:53 +0200 Subject: [PATCH 153/277] lib/vdso: Move fallback invocation to the callers To allow syscall fallbacks using the legacy 32bit syscall for 32bit VDSO builds, move the fallback invocation out into the callers. Split the common code out of __cvdso_clock_gettime/getres() and invoke the syscall fallback in the 64bit and 32bit variants. Preparatory work for using legacy syscalls in 32bit VDSO. No functional change. Fixes: 00b26474c2f1 ("lib/vdso: Provide generic VDSO implementation") Signed-off-by: Thomas Gleixner Tested-by: Vincenzo Frascino Reviewed-by: Andy Lutomirski Reviewed-by: Vincenzo Frascino Link: https://lkml.kernel.org/r/20190728131648.695579736@linutronix.de --- lib/vdso/gettimeofday.c | 53 ++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index e28f5a607a5f..a9e7fd029593 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -51,7 +51,7 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk, ns = vdso_ts->nsec; last = vd->cycle_last; if (unlikely((s64)cycles < 0)) - return clock_gettime_fallback(clk, ts); + return -1; ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); ns >>= vd->shift; @@ -82,14 +82,14 @@ static void do_coarse(const struct vdso_data *vd, clockid_t clk, } static __maybe_unused int -__cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) +__cvdso_clock_gettime_common(clockid_t clock, struct __kernel_timespec *ts) { const struct vdso_data *vd = __arch_get_vdso_data(); u32 msk; /* Check for negative values or invalid clocks */ if (unlikely((u32) clock >= MAX_CLOCKS)) - goto fallback; + return -1; /* * Convert the clockid to a bitmask and use it to check which @@ -104,9 +104,17 @@ __cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) } else if (msk & VDSO_RAW) { return do_hres(&vd[CS_RAW], clock, ts); } + return -1; +} -fallback: - return clock_gettime_fallback(clock, ts); +static __maybe_unused int +__cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) +{ + int ret = __cvdso_clock_gettime_common(clock, ts); + + if (unlikely(ret)) + return clock_gettime_fallback(clock, ts); + return 0; } static __maybe_unused int @@ -115,9 +123,12 @@ __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res) struct __kernel_timespec ts; int ret; - ret = __cvdso_clock_gettime(clock, &ts); + ret = __cvdso_clock_gettime_common(clock, &ts); - if (ret == 0) { + if (unlikely(ret)) + ret = clock_gettime_fallback(clock, &ts); + + if (likely(!ret)) { res->tv_sec = ts.tv_sec; res->tv_nsec = ts.tv_nsec; } @@ -163,17 +174,18 @@ static __maybe_unused time_t __cvdso_time(time_t *time) #ifdef VDSO_HAS_CLOCK_GETRES static __maybe_unused -int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res) +int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res) { const struct vdso_data *vd = __arch_get_vdso_data(); - u64 ns; + u64 hrtimer_res; u32 msk; - u64 hrtimer_res = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res); + u64 ns; /* Check for negative values or invalid clocks */ if (unlikely((u32) clock >= MAX_CLOCKS)) - goto fallback; + return -1; + hrtimer_res = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res); /* * Convert the clockid to a bitmask and use it to check which * clocks are handled in the VDSO directly. @@ -195,16 +207,22 @@ int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res) */ ns = hrtimer_res; } else { - goto fallback; + return -1; } res->tv_sec = 0; res->tv_nsec = ns; return 0; +} -fallback: - return clock_getres_fallback(clock, res); +int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res) +{ + int ret = __cvdso_clock_getres_common(clock, res); + + if (unlikely(ret)) + return clock_getres_fallback(clock, res); + return 0; } static __maybe_unused int @@ -213,13 +231,14 @@ __cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res) struct __kernel_timespec ts; int ret; - ret = __cvdso_clock_getres(clock, &ts); + ret = __cvdso_clock_getres_common(clock, &ts); + if (unlikely(ret)) + ret = clock_getres_fallback(clock, &ts); - if (ret == 0) { + if (likely(!ret)) { res->tv_sec = ts.tv_sec; res->tv_nsec = ts.tv_nsec; } - return ret; } #endif /* VDSO_HAS_CLOCK_GETRES */ From c60a32ea4f459f99b98d383cad3b1ac7cfb3f4be Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 30 Jul 2019 11:38:50 +0200 Subject: [PATCH 154/277] lib/vdso/32: Provide legacy syscall fallbacks To address the regression which causes seccomp to deny applications the access to clock_gettime64() and clock_getres64() syscalls because they are not enabled in the existing filters. That trips over the fact that 32bit VDSOs use the new clock_gettime64() and clock_getres64() syscalls in the fallback path. Add a conditional to invoke the 32bit legacy fallback syscalls instead of the new 64bit variants. The conditional can go away once all architectures are converted. Fixes: 00b26474c2f1 ("lib/vdso: Provide generic VDSO implementation") Signed-off-by: Thomas Gleixner Tested-by: Sean Christopherson Reviewed-by: Sean Christopherson Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1907301134470.1738@nanos.tec.linutronix.de --- lib/vdso/gettimeofday.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index a9e7fd029593..e630e7ff57f1 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -125,14 +125,18 @@ __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res) ret = __cvdso_clock_gettime_common(clock, &ts); +#ifdef VDSO_HAS_32BIT_FALLBACK + if (unlikely(ret)) + return clock_gettime32_fallback(clock, res); +#else if (unlikely(ret)) ret = clock_gettime_fallback(clock, &ts); +#endif if (likely(!ret)) { res->tv_sec = ts.tv_sec; res->tv_nsec = ts.tv_nsec; } - return ret; } @@ -232,8 +236,14 @@ __cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res) int ret; ret = __cvdso_clock_getres_common(clock, &ts); + +#ifdef VDSO_HAS_32BIT_FALLBACK + if (unlikely(ret)) + return clock_getres32_fallback(clock, res); +#else if (unlikely(ret)) ret = clock_getres_fallback(clock, &ts); +#endif if (likely(!ret)) { res->tv_sec = ts.tv_sec; From d2f5d3fa26196183adb44a413c44caa9872275b4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 28 Jul 2019 15:12:55 +0200 Subject: [PATCH 155/277] x86/vdso/32: Use 32bit syscall fallback The generic VDSO implementation uses the Y2038 safe clock_gettime64() and clock_getres_time64() syscalls as fallback for 32bit VDSO. This breaks seccomp setups because these syscalls might be not (yet) allowed. Implement the 32bit variants which use the legacy syscalls and select the variant in the core library. The 64bit time variants are not removed because they are required for the time64 based vdso accessors. Fixes: 7ac870747988 ("x86/vdso: Switch to generic vDSO implementation") Reported-by: Sean Christopherson Reported-by: Paul Bolle Suggested-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Reviewed-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20190728131648.879156507@linutronix.de --- arch/x86/include/asm/vdso/gettimeofday.h | 36 ++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index ae91429129a6..ba71a63cdac4 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -96,6 +96,8 @@ long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) #else +#define VDSO_HAS_32BIT_FALLBACK 1 + static __always_inline long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) { @@ -113,6 +115,23 @@ long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) return ret; } +static __always_inline +long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_gettime), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + static __always_inline long gettimeofday_fallback(struct __kernel_old_timeval *_tv, struct timezone *_tz) @@ -148,6 +167,23 @@ clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) return ret; } +static __always_inline +long clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_getres), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + #endif #ifdef CONFIG_PARAVIRT_CLOCK From 33a58980ff3cc5dbf0bb1b325746ac69223eda0b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 28 Jul 2019 15:12:56 +0200 Subject: [PATCH 156/277] arm64: compat: vdso: Use legacy syscalls as fallback The generic VDSO implementation uses the Y2038 safe clock_gettime64() and clock_getres_time64() syscalls as fallback for 32bit VDSO. This breaks seccomp setups because these syscalls might be not (yet) allowed. Implement the 32bit variants which use the legacy syscalls and select the variant in the core library. The 64bit time variants are not removed because they are required for the time64 based vdso accessors. Fixes: 00b26474c2f1 ("lib/vdso: Provide generic VDSO implementation") Reported-by: Sean Christopherson Reported-by: Paul Bolle Suggested-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Tested-by: Vincenzo Frascino Reviewed-by: Vincenzo Frascino Link: https://lkml.kernel.org/r/20190728131648.971361611@linutronix.de --- .../include/asm/vdso/compat_gettimeofday.h | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index f4812777f5c5..c50ee1b7d5cd 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -16,6 +16,8 @@ #define VDSO_HAS_CLOCK_GETRES 1 +#define VDSO_HAS_32BIT_FALLBACK 1 + static __always_inline int gettimeofday_fallback(struct __kernel_old_timeval *_tv, struct timezone *_tz) @@ -51,6 +53,23 @@ long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) return ret; } +static __always_inline +long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) +{ + register struct old_timespec32 *ts asm("r1") = _ts; + register clockid_t clkid asm("r0") = _clkid; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_compat_clock_gettime; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + static __always_inline int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) { @@ -72,6 +91,27 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) return ret; } +static __always_inline +int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) +{ + register struct old_timespec32 *ts asm("r1") = _ts; + register clockid_t clkid asm("r0") = _clkid; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_compat_clock_getres; + + /* The checks below are required for ABI consistency with arm */ + if ((_clkid >= MAX_CLOCKS) && (_ts == NULL)) + return -EINVAL; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) { u64 res; From 5348deb138abb90ca8f728356772e38abc791cf9 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Tue, 30 Jul 2019 17:07:08 +0530 Subject: [PATCH 157/277] dm table: fix dax_dev NULL dereference in device_synchronous() If a device doesn't support DAX its 'dax_dev' is NULL. Fix device_synchronous() to first check if dax_dev is NULL before dereferencing it. Fixes: 2e9ee0955d3c ("dm: enable synchronous dax") Reported-by: jencce.kernel@gmail.com Signed-off-by: Pankaj Gupta Acked-by: Dan Williams Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index caaee8032afe..12857beaa7f9 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -894,7 +894,7 @@ int device_supports_dax(struct dm_target *ti, struct dm_dev *dev, static int device_synchronous(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - return dax_synchronous(dev->dax_dev); + return dev->dax_dev && dax_synchronous(dev->dax_dev); } bool dm_table_supports_dax(struct dm_table *t, From 9c50a98f55f4b123227eebb25009524d20bc4c2a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 30 Jul 2019 14:39:43 -0400 Subject: [PATCH 158/277] dm table: fix various whitespace issues with recent DAX code Also, rename device_synchronous to device_dax_synchronous. Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 12857beaa7f9..7b6c3ee9e755 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -882,23 +882,23 @@ EXPORT_SYMBOL_GPL(dm_table_set_type); /* validate the dax capability of the target device span */ int device_supports_dax(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) + sector_t start, sector_t len, void *data) { int blocksize = *(int *) data; return generic_fsdax_supported(dev->dax_dev, dev->bdev, blocksize, - start, len); + start, len); } /* Check devices support synchronous DAX */ -static int device_synchronous(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) +static int device_dax_synchronous(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) { return dev->dax_dev && dax_synchronous(dev->dax_dev); } bool dm_table_supports_dax(struct dm_table *t, - iterate_devices_callout_fn iterate_fn, int *blocksize) + iterate_devices_callout_fn iterate_fn, int *blocksize) { struct dm_target *ti; unsigned i; @@ -911,7 +911,7 @@ bool dm_table_supports_dax(struct dm_table *t, return false; if (!ti->type->iterate_devices || - !ti->type->iterate_devices(ti, iterate_fn, blocksize)) + !ti->type->iterate_devices(ti, iterate_fn, blocksize)) return false; } @@ -1921,7 +1921,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (dm_table_supports_dax(t, device_supports_dax, &page_size)) { blk_queue_flag_set(QUEUE_FLAG_DAX, q); - if (dm_table_supports_dax(t, device_synchronous, NULL)) + if (dm_table_supports_dax(t, device_dax_synchronous, NULL)) set_dax_synchronous(t->md->dax_dev); } else From b1d45c23284e55a379f85554a27a548b7988d47a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 20 Jul 2019 19:39:43 +0900 Subject: [PATCH 159/277] tracing: Fix header include guards in trace event headers These include guards are broken. Match the #if !define() and #define lines so that they work correctly. Link: http://lkml.kernel.org/r/20190720103943.16982-1-yamada.masahiro@socionext.com Fixes: f54d1867005c3 ("dma-buf: Rename struct fence to dma_fence") Fixes: 2e26ca7150a4f ("tracing: Fix tracepoint.h DECLARE_TRACE() to allow more than one header") Fixes: e543002f77f46 ("qdisc: add tracepoint qdisc:qdisc_dequeue for dequeued SKBs") Fixes: 95f295f9fe081 ("dmaengine: tegra: add tracepoints to driver") Signed-off-by: Masahiro Yamada Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/dma_fence.h | 2 +- include/trace/events/napi.h | 4 ++-- include/trace/events/qdisc.h | 4 ++-- include/trace/events/tegra_apb_dma.h | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/trace/events/dma_fence.h b/include/trace/events/dma_fence.h index 2212adda8f77..64e92d56c6a8 100644 --- a/include/trace/events/dma_fence.h +++ b/include/trace/events/dma_fence.h @@ -2,7 +2,7 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM dma_fence -#if !defined(_TRACE_FENCE_H) || defined(TRACE_HEADER_MULTI_READ) +#if !defined(_TRACE_DMA_FENCE_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_DMA_FENCE_H #include diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index f3a12566bed0..6678cf8b235b 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h @@ -3,7 +3,7 @@ #define TRACE_SYSTEM napi #if !defined(_TRACE_NAPI_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_NAPI_H_ +#define _TRACE_NAPI_H #include #include @@ -38,7 +38,7 @@ TRACE_EVENT(napi_poll, #undef NO_DEV -#endif /* _TRACE_NAPI_H_ */ +#endif /* _TRACE_NAPI_H */ /* This part must be outside protection */ #include diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index 60d0d8bd336d..0d1a9ebf55ba 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -2,7 +2,7 @@ #define TRACE_SYSTEM qdisc #if !defined(_TRACE_QDISC_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_QDISC_H_ +#define _TRACE_QDISC_H #include #include @@ -44,7 +44,7 @@ TRACE_EVENT(qdisc_dequeue, __entry->txq_state, __entry->packets, __entry->skbaddr ) ); -#endif /* _TRACE_QDISC_H_ */ +#endif /* _TRACE_QDISC_H */ /* This part must be outside protection */ #include diff --git a/include/trace/events/tegra_apb_dma.h b/include/trace/events/tegra_apb_dma.h index 0818f6286110..971cd02d2daf 100644 --- a/include/trace/events/tegra_apb_dma.h +++ b/include/trace/events/tegra_apb_dma.h @@ -1,5 +1,5 @@ #if !defined(_TRACE_TEGRA_APB_DMA_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_TEGRA_APM_DMA_H +#define _TRACE_TEGRA_APB_DMA_H #include #include @@ -55,7 +55,7 @@ TRACE_EVENT(tegra_dma_isr, TP_printk("%s: irq %d\n", __get_str(chan), __entry->irq) ); -#endif /* _TRACE_TEGRADMA_H */ +#endif /* _TRACE_TEGRA_APB_DMA_H */ /* This part must be outside protection */ #include From 6c77221df96177da0520847ce91e33f539fb8b2d Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 30 Jul 2019 22:08:50 +0800 Subject: [PATCH 160/277] fgraph: Remove redundant ftrace_graph_notrace_addr() test We already have tested it before. The second one should be removed. With this change, the performance should have little improvement. Link: http://lkml.kernel.org/r/20190730140850.7927-1-changbin.du@gmail.com Cc: stable@vger.kernel.org Fixes: 9cd2992f2d6c ("fgraph: Have set_graph_notrace only affect function_graph tracer") Signed-off-by: Changbin Du Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_functions_graph.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 69ebf3c2f1b5..78af97163147 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -137,6 +137,13 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) return 0; + /* + * Do not trace a function if it's filtered by set_graph_notrace. + * Make the index of ret stack negative to indicate that it should + * ignore further functions. But it needs its own ret stack entry + * to recover the original index in order to continue tracing after + * returning from the function. + */ if (ftrace_graph_notrace_addr(trace->func)) { trace_recursion_set(TRACE_GRAPH_NOTRACE_BIT); /* @@ -155,16 +162,6 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) if (ftrace_graph_ignore_irqs()) return 0; - /* - * Do not trace a function if it's filtered by set_graph_notrace. - * Make the index of ret stack negative to indicate that it should - * ignore further functions. But it needs its own ret stack entry - * to recover the original index in order to continue tracing after - * returning from the function. - */ - if (ftrace_graph_notrace_addr(trace->func)) - return 1; - /* * Stop here if tracing_threshold is set. We only write function return * events to the ring buffer. From d65848657c3da5c0d4b685f823d0230f151ab34e Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Tue, 23 Jul 2019 10:18:01 -0400 Subject: [PATCH 161/277] drm/amdkfd: Fix byte align on VegaM This was missed during the addition of VegaM support Reviewed-by: Alex Deucher Signed-off-by: Kent Russell Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 1d3ee9c42f7e..6a5c96e519b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1140,7 +1140,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( adev->asic_type != CHIP_FIJI && adev->asic_type != CHIP_POLARIS10 && adev->asic_type != CHIP_POLARIS11 && - adev->asic_type != CHIP_POLARIS12) ? + adev->asic_type != CHIP_POLARIS12 && + adev->asic_type != CHIP_VEGAM) ? VI_BO_SIZE_ALIGN : 1; mapping_flags = AMDGPU_VM_PAGE_READABLE; From 2c0f07fe3ca57c8fb4ee179c9fb50d6eba75349e Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Mon, 3 Jun 2019 15:58:31 +0800 Subject: [PATCH 162/277] drm/amd/powerplay: add callback function of get_thermal_temperature_range 1. the thermal temperature is asic related data, move the code logic to xxx_ppt.c. 2. replace data structure PP_TemperatureRange with smu_temperature_range. 3. change temperature uint from temp*1000 to temp (temperature uint). Signed-off-by: Kevin Wang Signed-off-by: Kenneth Feng Acked-by: Huang Rui Signed-off-by: Alex Deucher --- .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 1 - drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 17 ++++++++++ drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 18 ++++++---- drivers/gpu/drm/amd/powerplay/vega20_ppt.c | 34 ++++++------------- 4 files changed, 40 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 22e46a289a16..208e6711d506 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -429,7 +429,6 @@ struct smu_table_context struct smu_table *tables; uint32_t table_count; struct smu_table memory_pool; - uint16_t software_shutdown_temp; uint8_t thermal_controller_type; uint16_t TDPODLimit; diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 4aaad255a288..3f68268a8733 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -1620,6 +1620,22 @@ static int navi10_set_performance_level(struct smu_context *smu, enum amd_dpm_fo return ret; } +static int navi10_get_thermal_temperature_range(struct smu_context *smu, + struct smu_temperature_range *range) +{ + struct smu_table_context *table_context = &smu->smu_table; + struct smu_11_0_powerplay_table *powerplay_table = table_context->power_play_table; + + if (!range || !powerplay_table) + return -EINVAL; + + /* The unit is temperature */ + range->min = 0; + range->max = powerplay_table->software_shutdown_temp; + + return 0; +} + static const struct pptable_funcs navi10_ppt_funcs = { .tables_init = navi10_tables_init, .alloc_dpm_context = navi10_allocate_dpm_context, @@ -1657,6 +1673,7 @@ static const struct pptable_funcs navi10_ppt_funcs = { .get_ppfeature_status = navi10_get_ppfeature_status, .set_ppfeature_status = navi10_set_ppfeature_status, .set_performance_level = navi10_set_performance_level, + .get_thermal_temperature_range = navi10_get_thermal_temperature_range, }; void navi10_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index caca9091bfcc..1ecb409e3bed 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -1124,10 +1124,8 @@ static int smu_v11_0_set_thermal_range(struct smu_context *smu, struct smu_temperature_range *range) { struct amdgpu_device *adev = smu->adev; - int low = SMU_THERMAL_MINIMUM_ALERT_TEMP * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; - int high = SMU_THERMAL_MAXIMUM_ALERT_TEMP * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + int low = SMU_THERMAL_MINIMUM_ALERT_TEMP; + int high = SMU_THERMAL_MAXIMUM_ALERT_TEMP; uint32_t val; if (!range) @@ -1138,6 +1136,9 @@ static int smu_v11_0_set_thermal_range(struct smu_context *smu, if (high > range->max) high = range->max; + low = max(SMU_THERMAL_MINIMUM_ALERT_TEMP, range->min); + high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP, range->max); + if (low > high) return -EINVAL; @@ -1146,8 +1147,8 @@ static int smu_v11_0_set_thermal_range(struct smu_context *smu, val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1); val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTH_MASK, 0); val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTL_MASK, 0); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES)); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES)); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high & 0xff)); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low & 0xff)); val = val & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL, val); @@ -1186,7 +1187,10 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu) if (!smu->pm_enabled) return ret; + ret = smu_get_thermal_temperature_range(smu, &range); + if (ret) + return ret; if (smu->smu_table.thermal_controller_type) { ret = smu_v11_0_set_thermal_range(smu, &range); @@ -1211,6 +1215,8 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu) adev->pm.dpm.thermal.min_mem_temp = range.mem_min; adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max; adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max; + adev->pm.dpm.thermal.min_temp = range.min * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + adev->pm.dpm.thermal.max_temp = range.max * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index dc139a6feeb1..dd6fd1c8bf24 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -450,7 +450,6 @@ static int vega20_store_powerplay_table(struct smu_context *smu) memcpy(table_context->driver_pptable, &powerplay_table->smcPPTable, sizeof(PPTable_t)); - table_context->software_shutdown_temp = powerplay_table->usSoftwareShutdownTemp; table_context->thermal_controller_type = powerplay_table->ucThermalControllerType; table_context->TDPODLimit = le32_to_cpu(powerplay_table->OverDrive8Table.ODSettingsMax[ATOM_VEGA20_ODSETTING_POWERPERCENTAGE]); @@ -3234,35 +3233,24 @@ static int vega20_set_watermarks_table(struct smu_context *smu, return 0; } -static const struct smu_temperature_range vega20_thermal_policy[] = -{ - {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000}, - { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000}, -}; - static int vega20_get_thermal_temperature_range(struct smu_context *smu, struct smu_temperature_range *range) { - + struct smu_table_context *table_context = &smu->smu_table; + ATOM_Vega20_POWERPLAYTABLE *powerplay_table = table_context->power_play_table; PPTable_t *pptable = smu->smu_table.driver_pptable; - if (!range) + if (!range || !powerplay_table) return -EINVAL; - memcpy(range, &vega20_thermal_policy[0], sizeof(struct smu_temperature_range)); - - range->max = pptable->TedgeLimit * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; - range->edge_emergency_max = (pptable->TedgeLimit + CTF_OFFSET_EDGE) * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; - range->hotspot_crit_max = pptable->ThotspotLimit * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; - range->hotspot_emergency_max = (pptable->ThotspotLimit + CTF_OFFSET_HOTSPOT) * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; - range->mem_crit_max = pptable->ThbmLimit * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; - range->mem_emergency_max = (pptable->ThbmLimit + CTF_OFFSET_HBM)* - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + /* The unit is temperature */ + range->min = 0; + range->max = powerplay_table->usSoftwareShutdownTemp; + range->edge_emergency_max = (pptable->TedgeLimit + CTF_OFFSET_EDGE); + range->hotspot_crit_max = pptable->ThotspotLimit; + range->hotspot_emergency_max = (pptable->ThotspotLimit + CTF_OFFSET_HOTSPOT); + range->mem_crit_max = pptable->ThbmLimit; + range->mem_emergency_max = (pptable->ThbmLimit + CTF_OFFSET_HBM); return 0; From 45a660143bf90a35ab64df663b88d82c02a17091 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Tue, 23 Jul 2019 19:56:52 +0800 Subject: [PATCH 163/277] drm/amd/powerplay: fix temperature granularity error in smu11 in this patch, drm/amd/powerplay: add callback function of get_thermal_temperature_range the driver missed temperature granularity change on other temperature. Signed-off-by: Kevin Wang Reviewed-by: Evan Quan Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index 1ecb409e3bed..ac5b26228e75 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -1206,15 +1206,15 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu) return ret; } - adev->pm.dpm.thermal.min_temp = range.min; - adev->pm.dpm.thermal.max_temp = range.max; - adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max; - adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min; - adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max; - adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max; - adev->pm.dpm.thermal.min_mem_temp = range.mem_min; - adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max; - adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max; + adev->pm.dpm.thermal.min_temp = range.min * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + adev->pm.dpm.thermal.max_temp = range.max * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + adev->pm.dpm.thermal.min_mem_temp = range.mem_min * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; adev->pm.dpm.thermal.min_temp = range.min * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; adev->pm.dpm.thermal.max_temp = range.max * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; From 090efd946d00cd23ce4ac25bce125f408b704d7d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 25 Jul 2019 22:28:58 -0500 Subject: [PATCH 164/277] drm/amdgpu/powerplay: use proper revision id for navi The PCI revision id determines the sku. Reviewed-by: Feifei Xu Reviewed-by: Kevin Wang Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 3f68268a8733..be592d22bdcc 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -23,6 +23,7 @@ #include "pp_debug.h" #include +#include #include "amdgpu.h" #include "amdgpu_smu.h" #include "atomfirmware.h" @@ -1573,7 +1574,7 @@ static int navi10_set_peak_clock_by_device(struct smu_context *smu) uint32_t sclk_freq = 0, uclk_freq = 0; uint32_t uclk_level = 0; - switch (adev->rev_id) { + switch (adev->pdev->revision) { case 0xf0: /* XTX */ case 0xc0: sclk_freq = NAVI10_PEAK_SCLK_XTX; From 479156f2e5540077377a823eaf5a4263bd329063 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 25 Jul 2019 12:10:34 +0800 Subject: [PATCH 165/277] drm/amd/powerplay: fix null pointer dereference around dpm state relates DPM state relates are not supported on the new SW SMU ASICs. But still it's not OK to trigger null pointer dereference on accessing them. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 18 +++++++++++++----- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 3 ++- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 03ca8c69114f..8c90baca07b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -159,12 +159,16 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type pm; - if (is_support_sw_smu(adev) && adev->smu.ppt_funcs->get_current_power_state) - pm = amdgpu_smu_get_current_power_state(adev); - else if (adev->powerplay.pp_funcs->get_current_power_state) + if (is_support_sw_smu(adev)) { + if (adev->smu.ppt_funcs->get_current_power_state) + pm = amdgpu_smu_get_current_power_state(adev); + else + pm = adev->pm.dpm.user_state; + } else if (adev->powerplay.pp_funcs->get_current_power_state) { pm = amdgpu_dpm_get_current_power_state(adev); - else + } else { pm = adev->pm.dpm.user_state; + } return snprintf(buf, PAGE_SIZE, "%s\n", (pm == POWER_STATE_TYPE_BATTERY) ? "battery" : @@ -191,7 +195,11 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, goto fail; } - if (adev->powerplay.pp_funcs->dispatch_tasks) { + if (is_support_sw_smu(adev)) { + mutex_lock(&adev->pm.mutex); + adev->pm.dpm.user_state = state; + mutex_unlock(&adev->pm.mutex); + } else if (adev->powerplay.pp_funcs->dispatch_tasks) { amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state); } else { mutex_lock(&adev->pm.mutex); diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index c097113c3976..88ed85e3d233 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -306,7 +306,8 @@ int smu_get_power_num_states(struct smu_context *smu, /* not support power state */ memset(state_info, 0, sizeof(struct pp_states_info)); - state_info->nums = 0; + state_info->nums = 1; + state_info->states[0] = POWER_STATE_TYPE_DEFAULT; return 0; } From f0bc1ee473fefd4d9f2ace9fad1cefdc0b7f6fdd Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 25 Jul 2019 10:12:42 +0800 Subject: [PATCH 166/277] drm/amd/powerplay: enable SW SMU reset functionality Move SMU irq handler register to sw_init as that's totally software related. Otherwise, it will prevent SMU reset working. Signed-off-by: Evan Quan Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 88ed85e3d233..93cd969e5cf5 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -724,6 +724,12 @@ static int smu_sw_init(void *handle) return ret; } + ret = smu_register_irq_handler(smu); + if (ret) { + pr_err("Failed to register smc irq handler!\n"); + return ret; + } + return 0; } @@ -733,6 +739,9 @@ static int smu_sw_fini(void *handle) struct smu_context *smu = &adev->smu; int ret; + kfree(smu->irq_source); + smu->irq_source = NULL; + ret = smu_smc_table_sw_fini(smu); if (ret) { pr_err("Failed to sw fini smc table!\n"); @@ -1089,10 +1098,6 @@ static int smu_hw_init(void *handle) if (ret) goto failed; - ret = smu_register_irq_handler(smu); - if (ret) - goto failed; - if (!smu->pm_enabled) adev->pm.dpm_enabled = false; else @@ -1122,9 +1127,6 @@ static int smu_hw_fini(void *handle) kfree(table_context->overdrive_table); table_context->overdrive_table = NULL; - kfree(smu->irq_source); - smu->irq_source = NULL; - ret = smu_fini_fb_allocations(smu); if (ret) return ret; From 8d1502f629c9966743de45744f4c1ba93a57d105 Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Wed, 31 Jul 2019 00:04:56 +0530 Subject: [PATCH 167/277] xen/gntdev.c: Replace vm_map_pages() with vm_map_pages_zero() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'commit df9bde015a72 ("xen/gntdev.c: convert to use vm_map_pages()")' breaks gntdev driver. If vma->vm_pgoff > 0, vm_map_pages() will: - use map->pages starting at vma->vm_pgoff instead of 0 - verify map->count against vma_pages()+vma->vm_pgoff instead of just vma_pages(). In practice, this breaks using a single gntdev FD for mapping multiple grants. relevant strace output: [pid 857] ioctl(7, IOCTL_GNTDEV_MAP_GRANT_REF, 0x7ffd3407b6d0) = 0 [pid 857] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, 7, 0) = 0x777f1211b000 [pid 857] ioctl(7, IOCTL_GNTDEV_SET_UNMAP_NOTIFY, 0x7ffd3407b710) = 0 [pid 857] ioctl(7, IOCTL_GNTDEV_MAP_GRANT_REF, 0x7ffd3407b6d0) = 0 [pid 857] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, 7, 0x1000) = -1 ENXIO (No such device or address) details here: https://github.com/QubesOS/qubes-issues/issues/5199 The reason is -> ( copying Marek's word from discussion) vma->vm_pgoff is used as index passed to gntdev_find_map_index. It's basically using this parameter for "which grant reference to map". map struct returned by gntdev_find_map_index() describes just the pages to be mapped. Specifically map->pages[0] should be mapped at vma->vm_start, not vma->vm_start+vma->vm_pgoff*PAGE_SIZE. When trying to map grant with index (aka vma->vm_pgoff) > 1, __vm_map_pages() will refuse to map it because it will expect map->count to be at least vma_pages(vma)+vma->vm_pgoff, while it is exactly vma_pages(vma). Converting vm_map_pages() to use vm_map_pages_zero() will fix the problem. Marek has tested and confirmed the same. Cc: stable@vger.kernel.org # v5.2+ Fixes: df9bde015a72 ("xen/gntdev.c: convert to use vm_map_pages()") Reported-by: Marek Marczykowski-Górecki Signed-off-by: Souptick Joarder Tested-by: Marek Marczykowski-Górecki Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- drivers/xen/gntdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 4c339c7e66e5..a446a7221e13 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -1143,7 +1143,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) goto out_put_map; if (!use_ptemod) { - err = vm_map_pages(vma, map->pages, map->count); + err = vm_map_pages_zero(vma, map->pages, map->count); if (err) goto out_put_map; } else { From a78d14a31666c636a9e00a589032119fb59e3b94 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Jul 2019 09:46:29 +0200 Subject: [PATCH 168/277] xen: avoid link error on ARM Building the privcmd code as a loadable module on ARM, we get a link error due to the private cache management functions: ERROR: "__sync_icache_dcache" [drivers/xen/xen-privcmd.ko] undefined! Move the code into a new that is always built in when Xen is enabled, as suggested by Juergen Gross and Boris Ostrovsky. Signed-off-by: Arnd Bergmann Reviewed-by: Stefano Stabellini Signed-off-by: Juergen Gross --- drivers/xen/privcmd.c | 35 +++++------------------------------ drivers/xen/xlate_mmu.c | 32 ++++++++++++++++++++++++++++++++ include/xen/xen-ops.h | 3 +++ 3 files changed, 40 insertions(+), 30 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 2f5ce7230a43..c6070e70dd73 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -724,25 +724,6 @@ static long privcmd_ioctl_restrict(struct file *file, void __user *udata) return 0; } -struct remap_pfn { - struct mm_struct *mm; - struct page **pages; - pgprot_t prot; - unsigned long i; -}; - -static int remap_pfn_fn(pte_t *ptep, unsigned long addr, void *data) -{ - struct remap_pfn *r = data; - struct page *page = r->pages[r->i]; - pte_t pte = pte_mkspecial(pfn_pte(page_to_pfn(page), r->prot)); - - set_pte_at(r->mm, addr, ptep, pte); - r->i++; - - return 0; -} - static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) { struct privcmd_data *data = file->private_data; @@ -774,7 +755,8 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) goto out; } - if (xen_feature(XENFEAT_auto_translated_physmap)) { + if (IS_ENABLED(CONFIG_XEN_AUTO_XLATE) && + xen_feature(XENFEAT_auto_translated_physmap)) { unsigned int nr = DIV_ROUND_UP(kdata.num, XEN_PFN_PER_PAGE); struct page **pages; unsigned int i; @@ -808,16 +790,9 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) if (rc) goto out; - if (xen_feature(XENFEAT_auto_translated_physmap)) { - struct remap_pfn r = { - .mm = vma->vm_mm, - .pages = vma->vm_private_data, - .prot = vma->vm_page_prot, - }; - - rc = apply_to_page_range(r.mm, kdata.addr, - kdata.num << PAGE_SHIFT, - remap_pfn_fn, &r); + if (IS_ENABLED(CONFIG_XEN_AUTO_XLATE) && + xen_feature(XENFEAT_auto_translated_physmap)) { + rc = xen_remap_vma_range(vma, kdata.addr, kdata.num << PAGE_SHIFT); } else { unsigned int domid = (xdata.flags & XENMEM_rsrc_acq_caller_owned) ? diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c index ba883a80b3c0..7b1077f0abcb 100644 --- a/drivers/xen/xlate_mmu.c +++ b/drivers/xen/xlate_mmu.c @@ -262,3 +262,35 @@ int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt, return 0; } EXPORT_SYMBOL_GPL(xen_xlate_map_ballooned_pages); + +struct remap_pfn { + struct mm_struct *mm; + struct page **pages; + pgprot_t prot; + unsigned long i; +}; + +static int remap_pfn_fn(pte_t *ptep, unsigned long addr, void *data) +{ + struct remap_pfn *r = data; + struct page *page = r->pages[r->i]; + pte_t pte = pte_mkspecial(pfn_pte(page_to_pfn(page), r->prot)); + + set_pte_at(r->mm, addr, ptep, pte); + r->i++; + + return 0; +} + +/* Used by the privcmd module, but has to be built-in on ARM */ +int xen_remap_vma_range(struct vm_area_struct *vma, unsigned long addr, unsigned long len) +{ + struct remap_pfn r = { + .mm = vma->vm_mm, + .pages = vma->vm_private_data, + .prot = vma->vm_page_prot, + }; + + return apply_to_page_range(vma->vm_mm, addr, len, remap_pfn_fn, &r); +} +EXPORT_SYMBOL_GPL(xen_remap_vma_range); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 4969817124a8..98b30c1613b2 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -109,6 +109,9 @@ static inline int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, } #endif +int xen_remap_vma_range(struct vm_area_struct *vma, unsigned long addr, + unsigned long len); + /* * xen_remap_domain_gfn_array() - map an array of foreign frames by gfn * @vma: VMA to map the pages into From 67d0859e2758ef992fd32499747ce4b1038a63c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 30 Jul 2019 11:17:03 +0200 Subject: [PATCH 169/277] drm/amdgpu: fix error handling in amdgpu_cs_process_fence_dep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We always need to drop the ctx reference and should check for errors first and then dereference the fence pointer. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e069de8b54e6..4e4094f842e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1044,29 +1044,27 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, return r; } - fence = amdgpu_ctx_get_fence(ctx, entity, - deps[i].handle); + fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); + amdgpu_ctx_put(ctx); + + if (IS_ERR(fence)) + return PTR_ERR(fence); + else if (!fence) + continue; if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { - struct drm_sched_fence *s_fence = to_drm_sched_fence(fence); + struct drm_sched_fence *s_fence; struct dma_fence *old = fence; + s_fence = to_drm_sched_fence(fence); fence = dma_fence_get(&s_fence->scheduled); dma_fence_put(old); } - if (IS_ERR(fence)) { - r = PTR_ERR(fence); - amdgpu_ctx_put(ctx); + r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); + dma_fence_put(fence); + if (r) return r; - } else if (fence) { - r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, - true); - dma_fence_put(fence); - amdgpu_ctx_put(ctx); - if (r) - return r; - } } return 0; } From 929e571c04c285861e0bb049a396a2bdaea63282 Mon Sep 17 00:00:00 2001 From: Wang Xiayang Date: Sat, 27 Jul 2019 17:30:30 +0800 Subject: [PATCH 170/277] drm/amdgpu: fix a potential information leaking bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Coccinelle reports a path that the array "data" is never initialized. The path skips the checks in the conditional branches when either of callback functions, read_wave_vgprs and read_wave_sgprs, is not registered. Later, the uninitialized "data" array is read in the while-loop below and passed to put_user(). Fix the path by allocating the array with kcalloc(). The patch is simplier than adding a fall-back branch that explicitly calls memset(data, 0, ...). Also it does not need the multiplication 1024*sizeof(*data) as the size parameter for memset() though there is no risk of integer overflow. Signed-off-by: Wang Xiayang Reviewed-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 6d54decef7f8..5652cc72ed3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -707,7 +707,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, thread = (*pos & GENMASK_ULL(59, 52)) >> 52; bank = (*pos & GENMASK_ULL(61, 60)) >> 60; - data = kmalloc_array(1024, sizeof(*data), GFP_KERNEL); + data = kcalloc(1024, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; From a02709818f397e7ed7a0943d65a49d54b2752626 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 22 Jul 2019 09:51:59 +0800 Subject: [PATCH 171/277] drm/amd/powerplay: add new sensor type for VCN powergate status VCN is widely used in new ASICs and different from tranditional UVD and VCE. Signed-off-by: Evan Quan Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 9f661bf96ed0..5b1ebb7f995a 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -123,6 +123,7 @@ enum amd_pp_sensors { AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK, AMDGPU_PP_SENSOR_MIN_FAN_RPM, AMDGPU_PP_SENSOR_MAX_FAN_RPM, + AMDGPU_PP_SENSOR_VCN_POWER_STATE, }; enum amd_pp_task { From 201cd702b7012ecee2a613e09b6a227ca0e12504 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 22 Jul 2019 09:55:36 +0800 Subject: [PATCH 172/277] drm/amd/powerplay: support VCN powergate status retrieval on Raven Enable VCN powergate status report on Raven. Signed-off-by: Evan Quan Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c index e32ae9d3373c..18e780f566fa 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c @@ -1111,6 +1111,7 @@ static int smu10_thermal_get_temperature(struct pp_hwmgr *hwmgr) static int smu10_read_sensor(struct pp_hwmgr *hwmgr, int idx, void *value, int *size) { + struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); uint32_t sclk, mclk; int ret = 0; @@ -1132,6 +1133,10 @@ static int smu10_read_sensor(struct pp_hwmgr *hwmgr, int idx, case AMDGPU_PP_SENSOR_GPU_TEMP: *((uint32_t *)value) = smu10_thermal_get_temperature(hwmgr); break; + case AMDGPU_PP_SENSOR_VCN_POWER_STATE: + *(uint32_t *)value = smu10_data->vcn_power_gated ? 0 : 1; + *size = 4; + break; default: ret = -EINVAL; break; @@ -1175,18 +1180,22 @@ static int smu10_powergate_sdma(struct pp_hwmgr *hwmgr, bool gate) static void smu10_powergate_vcn(struct pp_hwmgr *hwmgr, bool bgate) { + struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); + if (bgate) { amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_PowerDownVcn, 0); + smu10_data->vcn_power_gated = true; } else { smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_PowerUpVcn, 0); amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_UNGATE); + smu10_data->vcn_power_gated = false; } } From e21e3581e2a1df75abb96b545be15e526bd8c1c6 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 22 Jul 2019 09:57:27 +0800 Subject: [PATCH 173/277] drm/amd/powerplay: support VCN powergate status retrieval for SW SMU Commonly used for VCN powergate status retrieval for SW SMU. Signed-off-by: Evan Quan Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 93cd969e5cf5..0685a3388e38 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -338,6 +338,10 @@ int smu_common_read_sensor(struct smu_context *smu, enum amd_pp_sensors sensor, *(uint32_t *)data = smu_feature_is_enabled(smu, SMU_FEATURE_DPM_VCE_BIT) ? 1 : 0; *size = 4; break; + case AMDGPU_PP_SENSOR_VCN_POWER_STATE: + *(uint32_t *)data = smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT) ? 1 : 0; + *size = 4; + break; default: ret = -EINVAL; break; From a3ebbdb95f8c343a547ee2abec4d8abbf71f8a94 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 22 Jul 2019 10:27:21 +0800 Subject: [PATCH 174/277] drm/amd/powerplay: correct Navi10 VCN powergate control (v2) No VCN DPM bit check as that's different from VCN PG. Also no extra check for possible double enablement/disablement as that's already done by VCN. v2: check return value of smu_feature_set_enabled Signed-off-by: Evan Quan Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 28 ++++++++-------------- 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index be592d22bdcc..cc0a3b2256af 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -578,28 +578,20 @@ static int navi10_set_default_dpm_table(struct smu_context *smu) static int navi10_dpm_set_uvd_enable(struct smu_context *smu, bool enable) { int ret = 0; - struct smu_power_context *smu_power = &smu->smu_power; - struct smu_power_gate *power_gate = &smu_power->power_gate; - if (enable && power_gate->uvd_gated) { - if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UVD_BIT)) { - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_PowerUpVcn, 1); - if (ret) - return ret; - } - power_gate->uvd_gated = false; + if (enable) { + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_PowerUpVcn, 1); + if (ret) + return ret; } else { - if (!enable && !power_gate->uvd_gated) { - if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UVD_BIT)) { - ret = smu_send_smc_msg(smu, SMU_MSG_PowerDownVcn); - if (ret) - return ret; - } - power_gate->uvd_gated = true; - } + ret = smu_send_smc_msg(smu, SMU_MSG_PowerDownVcn); + if (ret) + return ret; } - return 0; + ret = smu_feature_set_enabled(smu, SMU_FEATURE_VCN_PG_BIT, enable); + + return ret; } static int navi10_get_current_clk_freq_by_table(struct smu_context *smu, From 6dee4829cfde106a8af7d0d3ba23022f8f054761 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 22 Jul 2019 10:42:29 +0800 Subject: [PATCH 175/277] drm/amd/powerplay: correct UVD/VCE/VCN power status retrieval VCN should be used for Vega20 later ASICs while UVD and VCE are for previous ASICs. Signed-off-by: Evan Quan Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 56 +++++++++++++++++--------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 8c90baca07b2..2b546567853b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -3075,28 +3075,44 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK, (void *)&value64, &size)) seq_printf(m, "SMC Feature Mask: 0x%016llx\n", value64); - /* UVD clocks */ - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) { - if (!value) { - seq_printf(m, "UVD: Disabled\n"); - } else { - seq_printf(m, "UVD: Enabled\n"); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size)) - seq_printf(m, "\t%u MHz (DCLK)\n", value/100); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size)) - seq_printf(m, "\t%u MHz (VCLK)\n", value/100); + if (adev->asic_type > CHIP_VEGA20) { + /* VCN clocks */ + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCN_POWER_STATE, (void *)&value, &size)) { + if (!value) { + seq_printf(m, "VCN: Disabled\n"); + } else { + seq_printf(m, "VCN: Enabled\n"); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size)) + seq_printf(m, "\t%u MHz (DCLK)\n", value/100); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size)) + seq_printf(m, "\t%u MHz (VCLK)\n", value/100); + } } - } - seq_printf(m, "\n"); + seq_printf(m, "\n"); + } else { + /* UVD clocks */ + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) { + if (!value) { + seq_printf(m, "UVD: Disabled\n"); + } else { + seq_printf(m, "UVD: Enabled\n"); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size)) + seq_printf(m, "\t%u MHz (DCLK)\n", value/100); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size)) + seq_printf(m, "\t%u MHz (VCLK)\n", value/100); + } + } + seq_printf(m, "\n"); - /* VCE clocks */ - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_POWER, (void *)&value, &size)) { - if (!value) { - seq_printf(m, "VCE: Disabled\n"); - } else { - seq_printf(m, "VCE: Enabled\n"); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_ECCLK, (void *)&value, &size)) - seq_printf(m, "\t%u MHz (ECCLK)\n", value/100); + /* VCE clocks */ + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_POWER, (void *)&value, &size)) { + if (!value) { + seq_printf(m, "VCE: Disabled\n"); + } else { + seq_printf(m, "VCE: Enabled\n"); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_ECCLK, (void *)&value, &size)) + seq_printf(m, "\t%u MHz (ECCLK)\n", value/100); + } } } From 7440ea8b2a4430eef5120d0a7faac6c39304ae6d Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 30 Jul 2019 14:37:04 +1000 Subject: [PATCH 176/277] drivers/macintosh/smu.c: Mark expected switch fall-through Mark switch cases where we are expecting to fall through. This patch fixes the following warning (Building: powerpc): drivers/macintosh/smu.c: In function 'smu_queue_i2c': drivers/macintosh/smu.c:854:21: warning: this statement may fall through [-Wimplicit-fallthrough=] cmd->info.devaddr &= 0xfe; ~~~~~~~~~~~~~~~~~~^~~~~~~ drivers/macintosh/smu.c:855:2: note: here case SMU_I2C_TRANSFER_STDSUB: ^~~~ Fixes: 0365ba7fb1fa ("[PATCH] ppc64: SMU driver update & i2c support") Signed-off-by: Stephen Rothwell Reviewed-by: Kees Cook Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190730143704.060a2606@canb.auug.org.au --- drivers/macintosh/smu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index 276065c888bc..23f1f41c8602 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -852,6 +852,7 @@ int smu_queue_i2c(struct smu_i2c_cmd *cmd) break; case SMU_I2C_TRANSFER_COMBINED: cmd->info.devaddr &= 0xfe; + /* fall through */ case SMU_I2C_TRANSFER_STDSUB: if (cmd->info.sublen > 3) return -EINVAL; From d7e23b887f67178c4f840781be7a6aa6aeb52ab1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 31 Jul 2019 06:01:42 +0000 Subject: [PATCH 177/277] powerpc/kasan: fix early boot failure on PPC32 Due to commit 4a6d8cf90017 ("powerpc/mm: don't use pte_alloc_kernel() until slab is available on PPC32"), pte_alloc_kernel() cannot be used during early KASAN init. Fix it by using memblock_alloc() instead. Fixes: 2edb16efc899 ("powerpc/32: Add KASAN support") Cc: stable@vger.kernel.org # v5.2+ Reported-by: Erhard F. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/da89670093651437f27d2975224712e0a130b055.1564552796.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/kasan/kasan_init_32.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 0d62be3cba47..74f4555a62ba 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -21,7 +21,7 @@ static void kasan_populate_pte(pte_t *ptep, pgprot_t prot) __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); } -static int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) +static int __ref kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) { pmd_t *pmd; unsigned long k_cur, k_next; @@ -35,7 +35,10 @@ static int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_ if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte) continue; - new = pte_alloc_one_kernel(&init_mm); + if (slab_is_available()) + new = pte_alloc_one_kernel(&init_mm); + else + new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE); if (!new) return -ENOMEM; From 12d1402ce35a900b4273893b885ddf35dbea0571 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 31 Jul 2019 16:16:00 +0200 Subject: [PATCH 178/277] parisc: Mark expected switch fall-throughs in fault.c Fix a fall-through warning in fault.c. Fixes: a035d552a93b ("Makefile: Globally enable fall-through warning") Signed-off-by: Helge Deller --- arch/parisc/mm/fault.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 6dd4669ce7a5..adbd5e2144a3 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -66,6 +66,7 @@ parisc_acctyp(unsigned long code, unsigned int inst) case 0x30000000: /* coproc2 */ if (bit22set(inst)) return VM_WRITE; + /* fall through */ case 0x0: /* indexed/memory management */ if (bit22set(inst)) { From 73b886724747ea5fa599ada988fe0a30edcc2e00 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 31 Jul 2019 16:16:43 +0200 Subject: [PATCH 179/277] parisc: Fix fall-through warnings in fpudispatch.c In fpudispatch.c we see a lot of fall-through warnings, but for this file we prefer to not mark the switches and instead keep it in it's original state as it's copied from HP-UX. Fixes: a035d552a93b ("Makefile: Globally enable fall-through warning") Signed-off-by: Helge Deller --- arch/parisc/math-emu/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/math-emu/Makefile b/arch/parisc/math-emu/Makefile index b6c4b254901a..55c1396580a4 100644 --- a/arch/parisc/math-emu/Makefile +++ b/arch/parisc/math-emu/Makefile @@ -18,3 +18,4 @@ obj-y := frnd.o driver.o decode_exc.o fpudispatch.o denormal.o \ # other very old or stripped-down PA-RISC CPUs -- not currently supported obj-$(CONFIG_MATH_EMULATION) += unimplemented-math-emulation.o +CFLAGS_REMOVE_fpudispatch.o = -Wimplicit-fallthrough=3 From c5df04521b521f14c30de327aa1e880f1190a355 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 23 Jul 2019 18:47:51 +0900 Subject: [PATCH 180/277] parisc: rename default_defconfig to defconfig 'default_defconfig' is an awkward name since 'defconfig' is the default. Let's simply say 'defconfig' like other architectures. You can drop the KBUILD_DEFCONFIG define by following the standard naming. Signed-off-by: Masahiro Yamada Signed-off-by: Helge Deller --- arch/parisc/Makefile | 2 -- arch/parisc/configs/{default_defconfig => defconfig} | 0 2 files changed, 2 deletions(-) rename arch/parisc/configs/{default_defconfig => defconfig} (100%) diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 8acb8fa1f8d6..b10a1179291a 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -19,8 +19,6 @@ KBUILD_IMAGE := vmlinuz -KBUILD_DEFCONFIG := default_defconfig - NM = sh $(srctree)/arch/parisc/nm CHECKFLAGS += -D__hppa__=1 LIBGCC = $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) diff --git a/arch/parisc/configs/default_defconfig b/arch/parisc/configs/defconfig similarity index 100% rename from arch/parisc/configs/default_defconfig rename to arch/parisc/configs/defconfig From 740f05f30a8c49ec63668055d28feedd906d3c50 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 23 Jul 2019 22:37:54 +0200 Subject: [PATCH 181/277] parisc: fix race condition in patching code Assume the following ftrace code sequence that was patched in earlier by ftrace_make_call(): PAGE A: ffc: addr of ftrace_caller() PAGE B: 000: 0x6fc10080 /* stw,ma r1,40(sp) */ 004: 0x48213fd1 /* ldw -18(r1),r1 */ 008: 0xe820c002 /* bv,n r0(r1) */ 00c: 0xe83f1fdf /* b,l,n .-c,r1 */ When a Code sequences that is to be patched spans a page break, we might have already cleared the part on the PAGE A. If an interrupt is coming in during the remap of the fixed mapping to PAGE B, it might execute the patched function with only parts of the FTRACE code cleared. To prevent this, clear the jump to our mini trampoline first, and clear the remaining parts after this. This might also happen when patch_text() patches a function that it calls during remap. Signed-off-by: Sven Schnelle Cc: # 5.2+ Signed-off-by: Helge Deller --- arch/parisc/kernel/ftrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index d784ccdd8fef..b6fb30f2e4bf 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -181,8 +181,9 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, for (i = 0; i < ARRAY_SIZE(insn); i++) insn[i] = INSN_NOP; + __patch_text((void *)rec->ip, INSN_NOP); __patch_text_multiple((void *)rec->ip + 4 - sizeof(insn), - insn, sizeof(insn)); + insn, sizeof(insn)-4); return 0; } #endif From d0ee879187df966ef638031b5f5183078d672141 Mon Sep 17 00:00:00 2001 From: Jackie Liu Date: Wed, 31 Jul 2019 14:39:33 +0800 Subject: [PATCH 182/277] io_uring: fix KASAN use after free in io_sq_wq_submit_work [root@localhost ~]# ./liburing/test/link QEMU Standard PC report that: [ 29.379892] CPU: 0 PID: 84 Comm: kworker/u2:2 Not tainted 5.3.0-rc2-00051-g4010b622f1d2-dirty #86 [ 29.379902] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 [ 29.379913] Workqueue: io_ring-wq io_sq_wq_submit_work [ 29.379929] Call Trace: [ 29.379953] dump_stack+0xa9/0x10e [ 29.379970] ? io_sq_wq_submit_work+0xbf4/0xe90 [ 29.379986] print_address_description.cold.6+0x9/0x317 [ 29.379999] ? io_sq_wq_submit_work+0xbf4/0xe90 [ 29.380010] ? io_sq_wq_submit_work+0xbf4/0xe90 [ 29.380026] __kasan_report.cold.7+0x1a/0x34 [ 29.380044] ? io_sq_wq_submit_work+0xbf4/0xe90 [ 29.380061] kasan_report+0xe/0x12 [ 29.380076] io_sq_wq_submit_work+0xbf4/0xe90 [ 29.380104] ? io_sq_thread+0xaf0/0xaf0 [ 29.380152] process_one_work+0xb59/0x19e0 [ 29.380184] ? pwq_dec_nr_in_flight+0x2c0/0x2c0 [ 29.380221] worker_thread+0x8c/0xf40 [ 29.380248] ? __kthread_parkme+0xab/0x110 [ 29.380265] ? process_one_work+0x19e0/0x19e0 [ 29.380278] kthread+0x30b/0x3d0 [ 29.380292] ? kthread_create_on_node+0xe0/0xe0 [ 29.380311] ret_from_fork+0x3a/0x50 [ 29.380635] Allocated by task 209: [ 29.381255] save_stack+0x19/0x80 [ 29.381268] __kasan_kmalloc.constprop.6+0xc1/0xd0 [ 29.381279] kmem_cache_alloc+0xc0/0x240 [ 29.381289] io_submit_sqe+0x11bc/0x1c70 [ 29.381300] io_ring_submit+0x174/0x3c0 [ 29.381311] __x64_sys_io_uring_enter+0x601/0x780 [ 29.381322] do_syscall_64+0x9f/0x4d0 [ 29.381336] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 29.381633] Freed by task 84: [ 29.382186] save_stack+0x19/0x80 [ 29.382198] __kasan_slab_free+0x11d/0x160 [ 29.382210] kmem_cache_free+0x8c/0x2f0 [ 29.382220] io_put_req+0x22/0x30 [ 29.382230] io_sq_wq_submit_work+0x28b/0xe90 [ 29.382241] process_one_work+0xb59/0x19e0 [ 29.382251] worker_thread+0x8c/0xf40 [ 29.382262] kthread+0x30b/0x3d0 [ 29.382272] ret_from_fork+0x3a/0x50 [ 29.382569] The buggy address belongs to the object at ffff888067172140 which belongs to the cache io_kiocb of size 224 [ 29.384692] The buggy address is located 120 bytes inside of 224-byte region [ffff888067172140, ffff888067172220) [ 29.386723] The buggy address belongs to the page: [ 29.387575] page:ffffea00019c5c80 refcount:1 mapcount:0 mapping:ffff88806ace5180 index:0x0 [ 29.387587] flags: 0x100000000000200(slab) [ 29.387603] raw: 0100000000000200 dead000000000100 dead000000000122 ffff88806ace5180 [ 29.387617] raw: 0000000000000000 00000000800c000c 00000001ffffffff 0000000000000000 [ 29.387624] page dumped because: kasan: bad access detected [ 29.387920] Memory state around the buggy address: [ 29.388771] ffff888067172080: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc [ 29.390062] ffff888067172100: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb [ 29.391325] >ffff888067172180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 29.392578] ^ [ 29.393480] ffff888067172200: fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc [ 29.394744] ffff888067172280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 29.396003] ================================================================== [ 29.397260] Disabling lock debugging due to kernel taint io_sq_wq_submit_work free and read req again. Cc: Zhengyuan Liu Cc: linux-block@vger.kernel.org Cc: stable@vger.kernel.org Fixes: f7b76ac9d17e ("io_uring: fix counter inc/dec mismatch in async_list") Signed-off-by: Jackie Liu Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 012bc0efb9d3..d542f1cf4428 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1838,6 +1838,7 @@ restart: do { struct sqe_submit *s = &req->submit; const struct io_uring_sqe *sqe = s->sqe; + unsigned int flags = req->flags; /* Ensure we clear previously set non-block flag */ req->rw.ki_flags &= ~IOCB_NOWAIT; @@ -1883,7 +1884,7 @@ restart: kfree(sqe); /* req from defer and link list needn't decrease async cnt */ - if (req->flags & (REQ_F_IO_DRAINED | REQ_F_LINK_DONE)) + if (flags & (REQ_F_IO_DRAINED | REQ_F_LINK_DONE)) goto out; if (!async_list) From 090bb803708198e5ab6b0046398c7ed9f4d12d6b Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 31 Jul 2019 14:26:51 +0200 Subject: [PATCH 183/277] ata: libahci: do not complain in case of deferred probe Retrieving PHYs can defer the probe, do not spawn an error when -EPROBE_DEFER is returned, it is normal behavior. Fixes: b1a9edbda040 ("ata: libahci: allow to use multiple PHYs") Reviewed-by: Hans de Goede Signed-off-by: Miquel Raynal Signed-off-by: Jens Axboe --- drivers/ata/libahci_platform.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 3a36e76eca83..9e9583a6bba9 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -338,6 +338,9 @@ static int ahci_platform_get_phy(struct ahci_host_priv *hpriv, u32 port, hpriv->phys[port] = NULL; rc = 0; break; + case -EPROBE_DEFER: + /* Do not complain yet */ + break; default: dev_err(dev, From 2b5c8f0063e4b263cf2de82029798183cf85c320 Mon Sep 17 00:00:00 2001 From: Munehisa Kamata Date: Wed, 31 Jul 2019 20:13:10 +0800 Subject: [PATCH 184/277] nbd: replace kill_bdev() with __invalidate_device() again Commit abbbdf12497d ("replace kill_bdev() with __invalidate_device()") once did this, but 29eaadc03649 ("nbd: stop using the bdev everywhere") resurrected kill_bdev() and it has been there since then. So buffer_head mappings still get killed on a server disconnection, and we can still hit the BUG_ON on a filesystem on the top of the nbd device. EXT4-fs (nbd0): mounted filesystem with ordered data mode. Opts: (null) block nbd0: Receive control failed (result -32) block nbd0: shutting down sockets print_req_error: I/O error, dev nbd0, sector 66264 flags 3000 EXT4-fs warning (device nbd0): htree_dirblock_to_tree:979: inode #2: lblock 0: comm ls: error -5 reading directory block print_req_error: I/O error, dev nbd0, sector 2264 flags 3000 EXT4-fs error (device nbd0): __ext4_get_inode_loc:4690: inode #2: block 283: comm ls: unable to read itable block EXT4-fs error (device nbd0) in ext4_reserve_inode_write:5894: IO failure ------------[ cut here ]------------ kernel BUG at fs/buffer.c:3057! invalid opcode: 0000 [#1] SMP PTI CPU: 7 PID: 40045 Comm: jbd2/nbd0-8 Not tainted 5.1.0-rc3+ #4 Hardware name: Amazon EC2 m5.12xlarge/, BIOS 1.0 10/16/2017 RIP: 0010:submit_bh_wbc+0x18b/0x190 ... Call Trace: jbd2_write_superblock+0xf1/0x230 [jbd2] ? account_entity_enqueue+0xc5/0xf0 jbd2_journal_update_sb_log_tail+0x94/0xe0 [jbd2] jbd2_journal_commit_transaction+0x12f/0x1d20 [jbd2] ? __switch_to_asm+0x40/0x70 ... ? lock_timer_base+0x67/0x80 kjournald2+0x121/0x360 [jbd2] ? remove_wait_queue+0x60/0x60 kthread+0xf8/0x130 ? commit_timeout+0x10/0x10 [jbd2] ? kthread_bind+0x10/0x10 ret_from_fork+0x35/0x40 With __invalidate_device(), I no longer hit the BUG_ON with sync or unmount on the disconnected device. Fixes: 29eaadc03649 ("nbd: stop using the bdev everywhere") Cc: linux-block@vger.kernel.org Cc: Ratna Manoj Bolla Cc: nbd@other.debian.org Cc: stable@vger.kernel.org Cc: David Woodhouse Reviewed-by: Josef Bacik Signed-off-by: Munehisa Kamata Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 9bcde2325893..e21d2ded732b 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1231,7 +1231,7 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd, struct block_device *bdev) { sock_shutdown(nbd); - kill_bdev(bdev); + __invalidate_device(bdev, true); nbd_bdev_reset(bdev); if (test_and_clear_bit(NBD_HAS_CONFIG_REF, &nbd->config->runtime_flags)) From dc25ace66c74ca148c393952bd2ce0856029c692 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Jul 2019 22:25:20 +0200 Subject: [PATCH 185/277] drm/i810: Use CONFIG_PREEMPTION CONFIG_PREEMPTION is selected by CONFIG_PREEMPT and by CONFIG_PREEMPT_RT. Both PREEMPT and PREEMPT_RT require the same functionality which today depends on CONFIG_PREEMPT. Change the Kconfig dependency of i810 to !CONFIG_PREEMPTION so the driver is not accidentally built on a RT kernel. Signed-off-by: Thomas Gleixner Cc: dri-devel@lists.freedesktop.org Cc: Maarten Lankhorst Cc: David Airlie Cc: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/alpine.DEB.2.21.1907262223280.1791@nanos.tec.linutronix.de --- drivers/gpu/drm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 1d80222587ad..3c88420e3497 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -394,7 +394,7 @@ config DRM_R128 config DRM_I810 tristate "Intel I810" # !PREEMPT because of missing ioctl locking - depends on DRM && AGP && AGP_INTEL && (!PREEMPT || BROKEN) + depends on DRM && AGP && AGP_INTEL && (!PREEMPTION || BROKEN) help Choose this option if you have an Intel I810 graphics card. If M is selected, the module will be called i810. AGP support is required From 944cfe9be1fbbec73bab2f7e77fe2e8f9c72970f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 31 Jul 2019 00:58:59 +0900 Subject: [PATCH 186/277] kbuild: modpost: include .*.cmd files only when targets exist If a build rule fails, the .DELETE_ON_ERROR special target removes the target, but does nothing for the .*.cmd file, which might be corrupted. So, .*.cmd files should be included only when the corresponding targets exist. Commit 392885ee82d3 ("kbuild: let fixdep directly write to .*.cmd files") missed to fix up this file. Fixes: 392885ee82d3 ("kbuild: let fixdep directly write to .*.cmd") Cc: # v5.0+ Signed-off-by: Masahiro Yamada --- scripts/Makefile.modpost | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 6b19c1a4eae5..ad4b9829a456 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -145,10 +145,8 @@ FORCE: # optimization, we don't need to read them if the target does not # exist, we will rebuild anyway in that case. -cmd_files := $(wildcard $(foreach f,$(sort $(targets)),$(dir $(f)).$(notdir $(f)).cmd)) +existing-targets := $(wildcard $(sort $(targets))) -ifneq ($(cmd_files),) - include $(cmd_files) -endif +-include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd) .PHONY: $(PHONY) From cb4819934a7f9b87876f11ed05b8624c0114551b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 31 Jul 2019 00:59:00 +0900 Subject: [PATCH 187/277] kbuild: modpost: handle KBUILD_EXTRA_SYMBOLS only for external modules KBUILD_EXTRA_SYMBOLS makes sense only when building external modules. Moreover, the modpost sets 'external_module' if the -e option is given. I replaced $(patsubst %, -e %,...) with simpler $(addprefix -e,...) while I was here. Signed-off-by: Masahiro Yamada --- scripts/Makefile.modpost | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index ad4b9829a456..c856512349cd 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -78,7 +78,7 @@ modpost = scripts/mod/modpost \ $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a,) \ $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \ $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \ - $(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \ + $(if $(KBUILD_EXTMOD),$(addprefix -e ,$(KBUILD_EXTRA_SYMBOLS))) \ $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ $(if $(KBUILD_MODPOST_WARN),-w) From acf2a1397a686365775385ed4657941119172263 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 31 Jul 2019 00:59:01 +0900 Subject: [PATCH 188/277] kbuild: modpost: remove unnecessary dependency for __modpost __modpost is a phony target. The dependency on FORCE is pointless. All the objects have been built in the previous stage, so the dependency on the objects are not necessary either. Count the number of modules in a more straightforward way. Signed-off-by: Masahiro Yamada --- scripts/Makefile.modpost | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index c856512349cd..fdab32d6f552 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -86,11 +86,11 @@ modpost = scripts/mod/modpost \ MODPOST_OPT=$(subst -i,-n,$(filter -i,$(MAKEFLAGS))) # We can go over command line length here, so be careful. -quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules +quiet_cmd_modpost = MODPOST $(words $(modules)) modules cmd_modpost = sed 's/ko$$/o/' $(modorder) | $(modpost) $(MODPOST_OPT) -s -T - PHONY += __modpost -__modpost: $(modules:.ko=.o) FORCE +__modpost: $(call cmd,modpost) $(wildcard vmlinux) quiet_cmd_kernel-mod = MODPOST $@ From a721588d9475cbbf9e8b3ae1a69b1dea88d01653 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 31 Jul 2019 00:59:02 +0900 Subject: [PATCH 189/277] kbuild: modpost: do not parse unnecessary rules for vmlinux modpost Since commit ff9b45c55b26 ("kbuild: modpost: read modules.order instead of $(MODVERDIR)/*.mod"), 'make vmlinux' emits a warning, like this: $ make defconfig vmlinux [ snip ] LD vmlinux.o cat: modules.order: No such file or directory MODPOST vmlinux.o MODINFO modules.builtin.modinfo KSYM .tmp_kallsyms1.o KSYM .tmp_kallsyms2.o LD vmlinux SORTEX vmlinux SYSMAP System.map When building only vmlinux, KBUILD_MODULES is not set. Hence, the modules.order is not generated. For the vmlinux modpost, it is not necessary at all. Separate scripts/Makefile.modpost for the vmlinux/modules stages. This works more efficiently because the vmlinux modpost does not need to include .*.cmd files. Fixes: ff9b45c55b26 ("kbuild: modpost: read modules.order instead of $(MODVERDIR)/*.mod") Signed-off-by: Masahiro Yamada --- scripts/Makefile.modpost | 74 ++++++++++++++++++++++------------------ scripts/link-vmlinux.sh | 2 +- 2 files changed, 41 insertions(+), 35 deletions(-) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index fdab32d6f552..92ed02d7cd5e 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -38,12 +38,39 @@ # symbols in the final module linking stage # KBUILD_MODPOST_NOFINAL can be set to skip the final link of modules. # This is solely useful to speed up test compiles -PHONY := _modpost -_modpost: __modpost + +PHONY := __modpost +__modpost: include include/config/auto.conf include scripts/Kbuild.include +kernelsymfile := $(objtree)/Module.symvers +modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers + +MODPOST = scripts/mod/modpost \ + $(if $(CONFIG_MODVERSIONS),-m) \ + $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a) \ + $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \ + $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \ + $(if $(KBUILD_EXTMOD),$(addprefix -e ,$(KBUILD_EXTRA_SYMBOLS))) \ + $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ + $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ + $(if $(KBUILD_MODPOST_WARN),-w) + +ifdef MODPOST_VMLINUX + +__modpost: vmlinux.o + +quiet_cmd_modpost = MODPOST $@ + cmd_modpost = $(MODPOST) $@ + +PHONY += vmlinux.o +vmlinux.o: + $(call cmd,modpost) + +else + # When building external modules load the Kbuild file to retrieve EXTRA_SYMBOLS info ifneq ($(KBUILD_EXTMOD),) @@ -58,50 +85,27 @@ endif include scripts/Makefile.lib -kernelsymfile := $(objtree)/Module.symvers -modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers - modorder := $(if $(KBUILD_EXTMOD),$(KBUILD_EXTMOD)/)modules.order -# Step 1), find all modules listed in modules.order -ifdef CONFIG_MODULES +# find all modules listed in modules.order modules := $(sort $(shell cat $(modorder))) -endif # Stop after building .o files if NOFINAL is set. Makes compile tests quicker -_modpost: $(if $(KBUILD_MODPOST_NOFINAL), $(modules:.ko:.o),$(modules)) +__modpost: $(if $(KBUILD_MODPOST_NOFINAL), $(modules:.ko:.o),$(modules)) + @: -# Step 2), invoke modpost -# Includes step 3,4 -modpost = scripts/mod/modpost \ - $(if $(CONFIG_MODVERSIONS),-m) \ - $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a,) \ - $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \ - $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \ - $(if $(KBUILD_EXTMOD),$(addprefix -e ,$(KBUILD_EXTRA_SYMBOLS))) \ - $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ - $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ - $(if $(KBUILD_MODPOST_WARN),-w) - -MODPOST_OPT=$(subst -i,-n,$(filter -i,$(MAKEFLAGS))) +MODPOST += $(subst -i,-n,$(filter -i,$(MAKEFLAGS))) -s -T - $(wildcard vmlinux) # We can go over command line length here, so be careful. quiet_cmd_modpost = MODPOST $(words $(modules)) modules - cmd_modpost = sed 's/ko$$/o/' $(modorder) | $(modpost) $(MODPOST_OPT) -s -T - + cmd_modpost = sed 's/ko$$/o/' $(modorder) | $(MODPOST) -PHONY += __modpost -__modpost: - $(call cmd,modpost) $(wildcard vmlinux) - -quiet_cmd_kernel-mod = MODPOST $@ - cmd_kernel-mod = $(modpost) $@ - -vmlinux.o: FORCE - $(call cmd,kernel-mod) +PHONY += modules-modpost +modules-modpost: + $(call cmd,modpost) # Declare generated files as targets for modpost -$(modules:.ko=.mod.c): __modpost ; - +$(modules:.ko=.mod.c): modules-modpost # Step 5), compile all *.mod.c files @@ -149,4 +153,6 @@ existing-targets := $(wildcard $(sort $(targets))) -include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd) +endif + .PHONY: $(PHONY) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index a7124f895b24..915775eb2921 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -210,7 +210,7 @@ info LD vmlinux.o modpost_link vmlinux.o # modpost vmlinux.o to check for section mismatches -${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o +${MAKE} -f "${srctree}/scripts/Makefile.modpost" MODPOST_VMLINUX=1 info MODINFO modules.builtin.modinfo ${OBJCOPY} -j .modinfo -O binary vmlinux.o modules.builtin.modinfo From e2a280d28d32d2cf7eaa2b1cecefd079b24c0245 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 31 Jul 2019 01:40:42 +0900 Subject: [PATCH 190/277] lib/raid6: fix unnecessary rebuild of vpermxor*.c The following four files are every time rebuilt: UNROLL lib/raid6/vpermxor1.c UNROLL lib/raid6/vpermxor2.c UNROLL lib/raid6/vpermxor4.c UNROLL lib/raid6/vpermxor8.c Fix the suffixes in the targets. Fixes: 72ad21075df8 ("lib/raid6: refactor unroll rules with pattern rules") Signed-off-by: Masahiro Yamada --- lib/raid6/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 42695bc8d451..0083b5cc646c 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -66,7 +66,7 @@ CFLAGS_vpermxor1.o += $(altivec_flags) CFLAGS_vpermxor2.o += $(altivec_flags) CFLAGS_vpermxor4.o += $(altivec_flags) CFLAGS_vpermxor8.o += $(altivec_flags) -targets += vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o +targets += vpermxor1.c vpermxor2.c vpermxor4.c vpermxor8.c $(obj)/vpermxor%.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE $(call if_changed,unroll) From e8de12fb7cde2c85bc31097cd098da79a4818305 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 30 Jul 2019 09:48:03 -0700 Subject: [PATCH 191/277] kbuild: Check for unknown options with cc-option usage in Kconfig and clang If the particular version of clang a user has doesn't enable -Werror=unknown-warning-option by default, even though it is the default[1], then make sure to pass the option to the Kconfig cc-option command so that testing options from Kconfig files works properly. Otherwise, depending on the default values setup in the clang toolchain we will silently assume options such as -Wmaybe-uninitialized are supported by clang, when they really aren't. A compilation issue only started happening for me once commit 589834b3a009 ("kbuild: Add -Werror=unknown-warning-option to CLANG_FLAGS") was applied on top of commit b303c6df80c9 ("kbuild: compute false-positive -Wmaybe-uninitialized cases in Kconfig"). This leads kbuild to try and test for the existence of the -Wmaybe-uninitialized flag with the cc-option command in scripts/Kconfig.include, and it doesn't see an error returned from the option test so it sets the config value to Y. Then the Makefile tries to pass the unknown option on the command line and -Werror=unknown-warning-option catches the invalid option and breaks the build. Before commit 589834b3a009 ("kbuild: Add -Werror=unknown-warning-option to CLANG_FLAGS") the build works fine, but any cc-option test of a warning option in Kconfig files silently evaluates to true, even if the warning option flag isn't supported on clang. Note: This doesn't change cc-option usages in Makefiles because those use a different rule that includes KBUILD_CFLAGS by default (see the __cc-option command in scripts/Kbuild.incluide). The KBUILD_CFLAGS variable already has the -Werror=unknown-warning-option flag set. Thanks to Doug for pointing out the different rule. [1] https://clang.llvm.org/docs/DiagnosticsReference.html#wunknown-warning-option Cc: Peter Smith Cc: Nick Desaulniers Cc: Douglas Anderson Signed-off-by: Stephen Boyd Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- scripts/Kconfig.include | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Kconfig.include b/scripts/Kconfig.include index 8a5c4d645eb1..4bbf4fc163a2 100644 --- a/scripts/Kconfig.include +++ b/scripts/Kconfig.include @@ -25,7 +25,7 @@ failure = $(if-success,$(1),n,y) # $(cc-option,) # Return y if the compiler supports , n otherwise -cc-option = $(success,$(CC) -Werror $(1) -E -x c /dev/null -o /dev/null) +cc-option = $(success,$(CC) -Werror $(CLANG_FLAGS) $(1) -E -x c /dev/null -o /dev/null) # $(ld-option,) # Return y if the linker supports , n otherwise From 3d0b63c5dfa8a912ac2d5026a24826b99b20ecc9 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Wed, 31 Jul 2019 08:53:42 -0600 Subject: [PATCH 192/277] MAINTAINERS: floppy: take over maintainership I would like to maintain the floppy driver. After the recent fixes, I think I know the code pretty well. Nowadays I've got 2 physical 3.5" readers to test all the changes. Signed-off-by: Denis Efremov Acked-by: Will Deacon Signed-off-by: Jens Axboe --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6426db5198f0..6c49b48cfd69 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6322,7 +6322,8 @@ F: Documentation/devicetree/bindings/counter/ftm-quaddec.txt F: drivers/counter/ftm-quaddec.c FLOPPY DRIVER -S: Orphan +M: Denis Efremov +S: Odd Fixes L: linux-block@vger.kernel.org F: drivers/block/floppy.c From c2c44ec20a8496f7a3b3401c092afe96908eced1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 27 Jul 2019 08:29:57 -0400 Subject: [PATCH 193/277] Unbreak mount_capable() In "consolidate the capability checks in sget_{fc,userns}())" the wrong argument had been passed to mount_capable() by sget_fc(). That mistake had been further obscured later, when switching mount_capable() to fs_context has moved the calculation of bogus argument from sget_fc() to mount_capable() itself. It should've been fc->user_ns all along. Screwed-up-by: Al Viro Reported-by: Christian Brauner Tested-by: Christian Brauner Reviewed-by: David Howells Signed-off-by: Al Viro --- fs/super.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/super.c b/fs/super.c index 113c58f19425..5960578a4076 100644 --- a/fs/super.c +++ b/fs/super.c @@ -478,13 +478,10 @@ EXPORT_SYMBOL(generic_shutdown_super); bool mount_capable(struct fs_context *fc) { - struct user_namespace *user_ns = fc->global ? &init_user_ns - : fc->user_ns; - if (!(fc->fs_type->fs_flags & FS_USERNS_MOUNT)) return capable(CAP_SYS_ADMIN); else - return ns_capable(user_ns, CAP_SYS_ADMIN); + return ns_capable(fc->user_ns, CAP_SYS_ADMIN); } /** From 706cb5492c8c459199fa0ab3b5fd2ba54ee53b0c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 27 Jul 2019 17:12:54 +0200 Subject: [PATCH 194/277] gfs2: Inode dirtying fix With the recent iomap write page reclaim deadlock fix, it turns out that the GLF_DIRTY flag isn't always set when it needs to be anymore: previously, this happened as a side effect of always adding the inode buffer head to the current transaction with gfs2_trans_add_meta, but this isn't happening consistently anymore. Fix by removing an additional unnecessary gfs2_trans_add_meta call and by setting the GLF_DIRTY flag in gfs2_iomap_end. (The GLF_DIRTY flag causes inode_go_sync to flush the transaction log when syncing out the glock of that inode. When the flag isn't set, inode_go_sync will skip inodes, including ones with an i_state of I_DIRTY_PAGES, which will lead to cluster incoherency.) In addition, in gfs2_iomap_page_done, if the metadata has changed, mark the inode as I_DIRTY_DATASYNC to have the inode added to the current transaction: we don't expect metadata to change here, but let's err on the safe side. Fixes: d0a22a4b03b8 ("gfs2: Fix iomap write page reclaim deadlock"); Signed-off-by: Andreas Gruenbacher --- fs/gfs2/bmap.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 79581b9bdebb..4df26ef2b2b1 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1002,11 +1002,16 @@ static void gfs2_iomap_page_done(struct inode *inode, loff_t pos, unsigned copied, struct page *page, struct iomap *iomap) { + struct gfs2_trans *tr = current->journal_info; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); if (page && !gfs2_is_stuffed(ip)) gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied); + + if (tr->tr_num_buf_new) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + gfs2_trans_end(sdp); } @@ -1099,8 +1104,6 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, tr = current->journal_info; if (tr->tr_num_buf_new) __mark_inode_dirty(inode, I_DIRTY_DATASYNC); - else - gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[0]); gfs2_trans_end(sdp); } @@ -1181,10 +1184,16 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, if (ip->i_qadata && ip->i_qadata->qa_qd_num) gfs2_quota_unlock(ip); + + if (unlikely(!written)) + goto out_unlock; + if (iomap->flags & IOMAP_F_SIZE_CHANGED) mark_inode_dirty(inode); - gfs2_write_unlock(inode); + set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); +out_unlock: + gfs2_write_unlock(inode); out: return 0; } From a22c5cf5d37ddcd4d2dd98ee9cf04fa5dd1e1c01 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 29 Jul 2019 13:54:21 +0100 Subject: [PATCH 195/277] arm64: vdso: Fix Makefile regression Using an old .config in combination with "make oldconfig" can cause an incorrect detection of the compat compiler: $ grep CROSS_COMPILE_COMPAT .config CONFIG_CROSS_COMPILE_COMPAT_VDSO="" $ make oldconfig && make arch/arm64/Makefile:58: gcc not found, check CROSS_COMPILE_COMPAT. Stop. Accordingly to the section 7.2 of the GNU Make manual "Syntax of Conditionals", "When the value results from complex expansions of variables and functions, expansions you would consider empty may actually contain whitespace characters and thus are not seen as empty. However, you can use the strip function to avoid interpreting whitespace as a non-empty value." Fix the issue adding strip to the CROSS_COMPILE_COMPAT string evaluation. Reported-by: Matteo Croce Tested-by: Matteo Croce Acked-by: Will Deacon Signed-off-by: Vincenzo Frascino Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index bb1f1dbb34e8..61de992bbea3 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -52,7 +52,7 @@ ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y) ifeq ($(CONFIG_CC_IS_CLANG), y) $(warning CROSS_COMPILE_COMPAT is clang, the compat vDSO will not be built) - else ifeq ($(CROSS_COMPILE_COMPAT),) + else ifeq ($(strip $(CROSS_COMPILE_COMPAT)),) $(warning CROSS_COMPILE_COMPAT not defined or empty, the compat vDSO will not be built) else ifeq ($(shell which $(CROSS_COMPILE_COMPAT)gcc 2> /dev/null),) $(error $(CROSS_COMPILE_COMPAT)gcc not found, check CROSS_COMPILE_COMPAT) From 147b9635e6347104b91f48ca9dca61eb0fbf2a54 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 30 Jul 2019 15:40:20 +0100 Subject: [PATCH 196/277] arm64: cpufeature: Fix feature comparison for CTR_EL0.{CWG,ERG} If CTR_EL0.{CWG,ERG} are 0b0000 then they must be interpreted to have their architecturally maximum values, which defeats the use of FTR_HIGHER_SAFE when sanitising CPU ID registers on heterogeneous machines. Introduce FTR_HIGHER_OR_ZERO_SAFE so that these fields effectively saturate at zero. Fixes: 3c739b571084 ("arm64: Keep track of CPU feature registers") Cc: # 4.4.x- Reviewed-by: Suzuki K Poulose Acked-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 7 ++++--- arch/arm64/kernel/cpufeature.c | 8 ++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 407e2bf23676..c96ffa4722d3 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -35,9 +35,10 @@ */ enum ftr_type { - FTR_EXACT, /* Use a predefined safe value */ - FTR_LOWER_SAFE, /* Smaller value is safe */ - FTR_HIGHER_SAFE,/* Bigger value is safe */ + FTR_EXACT, /* Use a predefined safe value */ + FTR_LOWER_SAFE, /* Smaller value is safe */ + FTR_HIGHER_SAFE, /* Bigger value is safe */ + FTR_HIGHER_OR_ZERO_SAFE, /* Bigger value is safe, but 0 is biggest */ }; #define FTR_STRICT true /* SANITY check strict matching required */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f29f36a65175..d19d14ba9ae4 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -225,8 +225,8 @@ static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DIC_SHIFT, 1, 1), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IDC_SHIFT, 1, 1), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, CTR_CWG_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, CTR_ERG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_CWG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_ERG_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DMINLINE_SHIFT, 4, 1), /* * Linux can handle differing I-cache policies. Userspace JITs will @@ -468,6 +468,10 @@ static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, case FTR_LOWER_SAFE: ret = new < cur ? new : cur; break; + case FTR_HIGHER_OR_ZERO_SAFE: + if (!cur || !new) + break; + /* Fallthrough */ case FTR_HIGHER_SAFE: ret = new > cur ? new : cur; break; From f1d4836201543e88ebe70237e67938168d5fab19 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Tue, 30 Jul 2019 17:23:48 -0400 Subject: [PATCH 197/277] arm64/efi: fix variable 'si' set but not used GCC throws out this warning on arm64. drivers/firmware/efi/libstub/arm-stub.c: In function 'efi_entry': drivers/firmware/efi/libstub/arm-stub.c:132:22: warning: variable 'si' set but not used [-Wunused-but-set-variable] Fix it by making free_screen_info() a static inline function. Acked-by: Will Deacon Signed-off-by: Qian Cai Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/efi.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 8e79ce9c3f5c..76a144702586 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -105,7 +105,11 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, ((protocol##_t *)instance)->f(instance, ##__VA_ARGS__) #define alloc_screen_info(x...) &screen_info -#define free_screen_info(x...) + +static inline void free_screen_info(efi_system_table_t *sys_table_arg, + struct screen_info *si) +{ +} /* redeclare as 'hidden' so the compiler will generate relative references */ extern struct screen_info screen_info __attribute__((__visibility__("hidden"))); From 7e9e5ead55beacc11116b3fb90b0de6e7cf55a69 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 17 Jul 2019 14:15:37 -0700 Subject: [PATCH 198/277] drm/vgem: fix cache synchronization on arm/arm64 drm_cflush_pages() is no-op on arm/arm64. But instead we can use dma_sync API. Fixes failures w/ vgem_test. Acked-by: Daniel Vetter Signed-off-by: Rob Clark Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20190717211542.30482-1-robdclark@gmail.com --- drivers/gpu/drm/vgem/vgem_drv.c | 130 ++++++++++++++++++++------------ 1 file changed, 83 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index 11a8f99ba18c..fc04803ff403 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -47,10 +47,16 @@ static struct vgem_device { struct platform_device *platform; } *vgem_device; +static void sync_and_unpin(struct drm_vgem_gem_object *bo); +static struct page **pin_and_sync(struct drm_vgem_gem_object *bo); + static void vgem_gem_free_object(struct drm_gem_object *obj) { struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj); + if (!obj->import_attach) + sync_and_unpin(vgem_obj); + kvfree(vgem_obj->pages); mutex_destroy(&vgem_obj->pages_lock); @@ -78,40 +84,15 @@ static vm_fault_t vgem_gem_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS; mutex_lock(&obj->pages_lock); + if (!obj->pages) + pin_and_sync(obj); if (obj->pages) { get_page(obj->pages[page_offset]); vmf->page = obj->pages[page_offset]; ret = 0; } mutex_unlock(&obj->pages_lock); - if (ret) { - struct page *page; - page = shmem_read_mapping_page( - file_inode(obj->base.filp)->i_mapping, - page_offset); - if (!IS_ERR(page)) { - vmf->page = page; - ret = 0; - } else switch (PTR_ERR(page)) { - case -ENOSPC: - case -ENOMEM: - ret = VM_FAULT_OOM; - break; - case -EBUSY: - ret = VM_FAULT_RETRY; - break; - case -EFAULT: - case -EINVAL: - ret = VM_FAULT_SIGBUS; - break; - default: - WARN_ON(PTR_ERR(page)); - ret = VM_FAULT_SIGBUS; - break; - } - - } return ret; } @@ -277,32 +258,93 @@ static const struct file_operations vgem_driver_fops = { .release = drm_release, }; +/* Called under pages_lock, except in free path (where it can't race): */ +static void sync_and_unpin(struct drm_vgem_gem_object *bo) +{ + struct drm_device *dev = bo->base.dev; + + if (bo->table) { + dma_sync_sg_for_cpu(dev->dev, bo->table->sgl, + bo->table->nents, DMA_BIDIRECTIONAL); + sg_free_table(bo->table); + kfree(bo->table); + bo->table = NULL; + } + + if (bo->pages) { + drm_gem_put_pages(&bo->base, bo->pages, true, true); + bo->pages = NULL; + } +} + +static struct page **pin_and_sync(struct drm_vgem_gem_object *bo) +{ + struct drm_device *dev = bo->base.dev; + int npages = bo->base.size >> PAGE_SHIFT; + struct page **pages; + struct sg_table *sgt; + + WARN_ON(!mutex_is_locked(&bo->pages_lock)); + + pages = drm_gem_get_pages(&bo->base); + if (IS_ERR(pages)) { + bo->pages_pin_count--; + mutex_unlock(&bo->pages_lock); + return pages; + } + + sgt = drm_prime_pages_to_sg(pages, npages); + if (IS_ERR(sgt)) { + dev_err(dev->dev, + "failed to allocate sgt: %ld\n", + PTR_ERR(bo->table)); + drm_gem_put_pages(&bo->base, pages, false, false); + mutex_unlock(&bo->pages_lock); + return ERR_CAST(bo->table); + } + + /* + * Flush the object from the CPU cache so that importers + * can rely on coherent indirect access via the exported + * dma-address. + */ + dma_sync_sg_for_device(dev->dev, sgt->sgl, + sgt->nents, DMA_BIDIRECTIONAL); + + bo->pages = pages; + bo->table = sgt; + + return pages; +} + static struct page **vgem_pin_pages(struct drm_vgem_gem_object *bo) { + struct page **pages; + mutex_lock(&bo->pages_lock); - if (bo->pages_pin_count++ == 0) { - struct page **pages; - - pages = drm_gem_get_pages(&bo->base); - if (IS_ERR(pages)) { - bo->pages_pin_count--; - mutex_unlock(&bo->pages_lock); - return pages; - } - - bo->pages = pages; + if (bo->pages_pin_count++ == 0 && !bo->pages) { + pages = pin_and_sync(bo); + } else { + WARN_ON(!bo->pages); + pages = bo->pages; } mutex_unlock(&bo->pages_lock); - return bo->pages; + return pages; } static void vgem_unpin_pages(struct drm_vgem_gem_object *bo) { + /* + * We shouldn't hit this for imported bo's.. in the import + * case we don't own the scatter-table + */ + WARN_ON(bo->base.import_attach); + mutex_lock(&bo->pages_lock); if (--bo->pages_pin_count == 0) { - drm_gem_put_pages(&bo->base, bo->pages, true, true); - bo->pages = NULL; + WARN_ON(!bo->table); + sync_and_unpin(bo); } mutex_unlock(&bo->pages_lock); } @@ -310,18 +352,12 @@ static void vgem_unpin_pages(struct drm_vgem_gem_object *bo) static int vgem_prime_pin(struct drm_gem_object *obj) { struct drm_vgem_gem_object *bo = to_vgem_bo(obj); - long n_pages = obj->size >> PAGE_SHIFT; struct page **pages; pages = vgem_pin_pages(bo); if (IS_ERR(pages)) return PTR_ERR(pages); - /* Flush the object from the CPU cache so that importers can rely - * on coherent indirect access via the exported dma-address. - */ - drm_clflush_pages(pages, n_pages); - return 0; } From b399abe7c21e248dc6224cadc9a378a2beb10cfd Mon Sep 17 00:00:00 2001 From: Mao Han Date: Thu, 11 Jul 2019 10:38:40 +0800 Subject: [PATCH 199/277] riscv: Fix perf record without libelf support This patch fix following perf record error by linking vdso.so with build id. perf.data perf.data.old [ perf record: Woken up 1 times to write data ] free(): double free detected in tcache 2 Aborted perf record use filename__read_build_id(util/symbol-minimal.c) to get build id when libelf is not supported. When vdso.so is linked without build id, the section size of PT_NOTE will be zero, buf size will realloc to zero and cause memory corruption. Signed-off-by: Mao Han Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Signed-off-by: Paul Walmsley --- arch/riscv/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index f1d6ffe43e42..49a5852fd07d 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -37,7 +37,7 @@ $(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE # these symbols in the kernel code rather than hand-coded addresses. SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ - -Wl,--hash-style=both + -Wl,--build-id -Wl,--hash-style=both $(obj)/vdso-dummy.o: $(src)/vdso.lds $(obj)/rt_sigreturn.o FORCE $(call if_changed,vdsold) From 11ae2d892139a1086f257188d457ddcb71ab5257 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Thu, 25 Jul 2019 13:41:31 -0700 Subject: [PATCH 200/277] riscv: dts: fu540-c000: drop "timebase-frequency" On FU540-based systems, the "timebase-frequency" (RTCCLK) is sourced from an external crystal located on the PCB. Thus the timebase-frequency DT property should be defined by the board that uses the SoC, not the SoC itself. Drop the superfluous timebase-frequency property from the SoC DT data. (It's already present in the board DT data.) Signed-off-by: Paul Walmsley Reviewed-by: Bin Meng --- arch/riscv/boot/dts/sifive/fu540-c000.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index 9bf63f0ab253..42b5ec223100 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -21,7 +21,6 @@ cpus { #address-cells = <1>; #size-cells = <0>; - timebase-frequency = <1000000>; cpu0: cpu@0 { compatible = "sifive,e51", "sifive,rocket0", "riscv"; device_type = "cpu"; From b7edabfe843805b7ab8a91396b0782042a289308 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Thu, 25 Jul 2019 15:05:59 -0700 Subject: [PATCH 201/277] riscv: defconfig: align RV64 defconfig to the output of "make savedefconfig" Align the RV64 defconfig to the output of "make savedefconfig" to avoid unnecessary deltas for future defconfig patches. This patch should have no runtime functional impact. Signed-off-by: Paul Walmsley Reviewed-by: Bin Meng --- arch/riscv/configs/defconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index b7b749b18853..93205c0bf71d 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -34,6 +34,7 @@ CONFIG_PCIEPORTBUS=y CONFIG_PCI_HOST_GENERIC=y CONFIG_PCIE_XILINX=y CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y CONFIG_BLK_DEV_LOOP=y CONFIG_VIRTIO_BLK=y CONFIG_BLK_DEV_SD=y @@ -53,6 +54,8 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_EARLYCON_RISCV_SBI=y CONFIG_HVC_RISCV_SBI=y +CONFIG_SPI=y +CONFIG_SPI_SIFIVE=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_DRM=y CONFIG_DRM_RADEON=y @@ -66,8 +69,9 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y +CONFIG_MMC=y +CONFIG_MMC_SPI=y CONFIG_VIRTIO_MMIO=y -CONFIG_SPI_SIFIVE=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS4_FS=y @@ -83,8 +87,4 @@ CONFIG_ROOT_NFS=y CONFIG_CRYPTO_USER_API_HASH=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y -CONFIG_SPI=y -CONFIG_MMC_SPI=y -CONFIG_MMC=y -CONFIG_DEVTMPFS_MOUNT=y # CONFIG_RCU_TRACE is not set From 1b7e816fc80e668f0ccc8542cec20b9259abace1 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 31 Jul 2019 15:32:40 -0400 Subject: [PATCH 202/277] mm: slub: Fix slab walking for init_on_free To properly clear the slab on free with slab_want_init_on_free, we walk the list of free objects using get_freepointer/set_freepointer. The value we get from get_freepointer may not be valid. This isn't an issue since an actual value will get written later but this means there's a chance of triggering a bug if we use this value with set_freepointer: kernel BUG at mm/slub.c:306! invalid opcode: 0000 [#1] PREEMPT PTI CPU: 0 PID: 0 Comm: swapper Not tainted 5.2.0-05754-g6471384a #4 RIP: 0010:kfree+0x58a/0x5c0 Code: 48 83 05 78 37 51 02 01 0f 0b 48 83 05 7e 37 51 02 01 48 83 05 7e 37 51 02 01 48 83 05 7e 37 51 02 01 48 83 05 d6 37 51 02 01 <0f> 0b 48 83 05 d4 37 51 02 01 48 83 05 d4 37 51 02 01 48 83 05 d4 RSP: 0000:ffffffff82603d90 EFLAGS: 00010002 RAX: ffff8c3976c04320 RBX: ffff8c3976c04300 RCX: 0000000000000000 RDX: ffff8c3976c04300 RSI: 0000000000000000 RDI: ffff8c3976c04320 RBP: ffffffff82603db8 R08: 0000000000000000 R09: 0000000000000000 R10: ffff8c3976c04320 R11: ffffffff8289e1e0 R12: ffffd52cc8db0100 R13: ffff8c3976c01a00 R14: ffffffff810f10d4 R15: ffff8c3976c04300 FS: 0000000000000000(0000) GS:ffffffff8266b000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff8c397ffff000 CR3: 0000000125020000 CR4: 00000000000406b0 Call Trace: apply_wqattrs_prepare+0x154/0x280 apply_workqueue_attrs_locked+0x4e/0xe0 apply_workqueue_attrs+0x36/0x60 alloc_workqueue+0x25a/0x6d0 workqueue_init_early+0x246/0x348 start_kernel+0x3c7/0x7ec x86_64_start_reservations+0x40/0x49 x86_64_start_kernel+0xda/0xe4 secondary_startup_64+0xb6/0xc0 Modules linked in: ---[ end trace f67eb9af4d8d492b ]--- Fix this by ensuring the value we set with set_freepointer is either NULL or another value in the chain. Reported-by: kernel test robot Signed-off-by: Laura Abbott Fixes: 6471384af2a6 ("mm: security: introduce init_on_alloc=1 and init_on_free=1 boot options") Reviewed-by: Kees Cook Signed-off-by: Linus Torvalds --- mm/slub.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index e6c030e47364..8834563cdb4b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1432,7 +1432,9 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, void *old_tail = *tail ? *tail : *head; int rsize; - if (slab_want_init_on_free(s)) + if (slab_want_init_on_free(s)) { + void *p = NULL; + do { object = next; next = get_freepointer(s, object); @@ -1445,8 +1447,10 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, : 0; memset((char *)object + s->inuse, 0, s->size - s->inuse - rsize); - set_freepointer(s, object, next); + set_freepointer(s, object, p); + p = object; } while (object != old_tail); + } /* * Compiler cannot detect this function can be removed if slab_free_hook() From b36a1552d7319bbfd5cf7f08726c23c5c66d4f73 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Tue, 30 Jul 2019 11:33:45 +0200 Subject: [PATCH 203/277] Bluetooth: hci_uart: check for missing tty operations Certain ttys operations (pty_unix98_ops) lack tiocmget() and tiocmset() functions which are called by the certain HCI UART protocols (hci_ath, hci_bcm, hci_intel, hci_mrvl, hci_qca) via hci_uart_set_flow_control() or directly. This leads to an execution at NULL and can be triggered by an unprivileged user. Fix this by adding a helper function and a check for the missing tty operations in the protocols code. This fixes CVE-2019-10207. The Fixes: lines list commits where calls to tiocm[gs]et() or hci_uart_set_flow_control() were added to the HCI UART protocols. Link: https://syzkaller.appspot.com/bug?id=1b42faa2848963564a5b1b7f8c837ea7b55ffa50 Reported-by: syzbot+79337b501d6aa974d0f6@syzkaller.appspotmail.com Cc: stable@vger.kernel.org # v2.6.36+ Fixes: b3190df62861 ("Bluetooth: Support for Atheros AR300x serial chip") Fixes: 118612fb9165 ("Bluetooth: hci_bcm: Add suspend/resume PM functions") Fixes: ff2895592f0f ("Bluetooth: hci_intel: Add Intel baudrate configuration support") Fixes: 162f812f23ba ("Bluetooth: hci_uart: Add Marvell support") Fixes: fa9ad876b8e0 ("Bluetooth: hci_qca: Add support for Qualcomm Bluetooth chip wcn3990") Signed-off-by: Vladis Dronov Signed-off-by: Marcel Holtmann Reviewed-by: Yu-Chen, Cho Tested-by: Yu-Chen, Cho Signed-off-by: Linus Torvalds --- drivers/bluetooth/hci_ath.c | 3 +++ drivers/bluetooth/hci_bcm.c | 3 +++ drivers/bluetooth/hci_intel.c | 3 +++ drivers/bluetooth/hci_ldisc.c | 13 +++++++++++++ drivers/bluetooth/hci_mrvl.c | 3 +++ drivers/bluetooth/hci_qca.c | 3 +++ drivers/bluetooth/hci_uart.h | 1 + 7 files changed, 29 insertions(+) diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c index a55be205b91a..dbfe34664633 100644 --- a/drivers/bluetooth/hci_ath.c +++ b/drivers/bluetooth/hci_ath.c @@ -98,6 +98,9 @@ static int ath_open(struct hci_uart *hu) BT_DBG("hu %p", hu); + if (!hci_uart_has_flow_control(hu)) + return -EOPNOTSUPP; + ath = kzalloc(sizeof(*ath), GFP_KERNEL); if (!ath) return -ENOMEM; diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 8905ad2edde7..ae2624fce913 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -406,6 +406,9 @@ static int bcm_open(struct hci_uart *hu) bt_dev_dbg(hu->hdev, "hu %p", hu); + if (!hci_uart_has_flow_control(hu)) + return -EOPNOTSUPP; + bcm = kzalloc(sizeof(*bcm), GFP_KERNEL); if (!bcm) return -ENOMEM; diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c index 207bae5e0d46..31f25153087d 100644 --- a/drivers/bluetooth/hci_intel.c +++ b/drivers/bluetooth/hci_intel.c @@ -391,6 +391,9 @@ static int intel_open(struct hci_uart *hu) BT_DBG("hu %p", hu); + if (!hci_uart_has_flow_control(hu)) + return -EOPNOTSUPP; + intel = kzalloc(sizeof(*intel), GFP_KERNEL); if (!intel) return -ENOMEM; diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 8950e07889fe..85a30fb9177b 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -292,6 +292,19 @@ static int hci_uart_send_frame(struct hci_dev *hdev, struct sk_buff *skb) return 0; } +/* Check the underlying device or tty has flow control support */ +bool hci_uart_has_flow_control(struct hci_uart *hu) +{ + /* serdev nodes check if the needed operations are present */ + if (hu->serdev) + return true; + + if (hu->tty->driver->ops->tiocmget && hu->tty->driver->ops->tiocmset) + return true; + + return false; +} + /* Flow control or un-flow control the device */ void hci_uart_set_flow_control(struct hci_uart *hu, bool enable) { diff --git a/drivers/bluetooth/hci_mrvl.c b/drivers/bluetooth/hci_mrvl.c index f98e5cc343b2..fbc3f7c3a5c7 100644 --- a/drivers/bluetooth/hci_mrvl.c +++ b/drivers/bluetooth/hci_mrvl.c @@ -59,6 +59,9 @@ static int mrvl_open(struct hci_uart *hu) BT_DBG("hu %p", hu); + if (!hci_uart_has_flow_control(hu)) + return -EOPNOTSUPP; + mrvl = kzalloc(sizeof(*mrvl), GFP_KERNEL); if (!mrvl) return -ENOMEM; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 9a5c9c1f9484..82a0a3691a63 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -473,6 +473,9 @@ static int qca_open(struct hci_uart *hu) BT_DBG("hu %p qca_open", hu); + if (!hci_uart_has_flow_control(hu)) + return -EOPNOTSUPP; + qca = kzalloc(sizeof(struct qca_data), GFP_KERNEL); if (!qca) return -ENOMEM; diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index f11af3912ce6..6ab631101019 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -104,6 +104,7 @@ int hci_uart_wait_until_sent(struct hci_uart *hu); int hci_uart_init_ready(struct hci_uart *hu); void hci_uart_init_work(struct work_struct *work); void hci_uart_set_baudrate(struct hci_uart *hu, unsigned int speed); +bool hci_uart_has_flow_control(struct hci_uart *hu); void hci_uart_set_flow_control(struct hci_uart *hu, bool enable); void hci_uart_set_speeds(struct hci_uart *hu, unsigned int init_speed, unsigned int oper_speed); From 3de433c5b38af49a5fc7602721e2ab5d39f1e69c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 30 Jul 2019 14:46:28 -0700 Subject: [PATCH 204/277] drm/msm: Use the correct dma_sync calls in msm_gem [subject was: drm/msm: shake fist angrily at dma-mapping] So, using dma_sync_* for our cache needs works out w/ dma iommu ops, but it falls appart with dma direct ops. The problem is that, depending on display generation, we can have either set of dma ops (mdp4 and dpu have iommu wired to mdss node, which maps to toplevel drm device, but mdp5 has iommu wired up to the mdp sub-node within mdss). Fixes this splat on mdp5 devices: Unable to handle kernel paging request at virtual address ffffffff80000000 Mem abort info: ESR = 0x96000144 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000144 CM = 1, WnR = 1 swapper pgtable: 4k pages, 48-bit VAs, pgdp=00000000810e4000 [ffffffff80000000] pgd=0000000000000000 Internal error: Oops: 96000144 [#1] SMP Modules linked in: btqcomsmd btqca bluetooth cfg80211 ecdh_generic ecc rfkill libarc4 panel_simple msm wcnss_ctrl qrtr_smd drm_kms_helper venus_enc venus_dec videobuf2_dma_sg videobuf2_memops drm venus_core ipv6 qrtr qcom_wcnss_pil v4l2_mem2mem qcom_sysmon videobuf2_v4l2 qmi_helpers videobuf2_common crct10dif_ce mdt_loader qcom_common videodev qcom_glink_smem remoteproc bmc150_accel_i2c bmc150_magn_i2c bmc150_accel_core bmc150_magn snd_soc_lpass_apq8016 snd_soc_msm8916_analog mms114 mc nf_defrag_ipv6 snd_soc_lpass_cpu snd_soc_apq8016_sbc industrialio_triggered_buffer kfifo_buf snd_soc_lpass_platform snd_soc_msm8916_digital drm_panel_orientation_quirks CPU: 2 PID: 33 Comm: kworker/2:1 Not tainted 5.3.0-rc2 #1 Hardware name: Samsung Galaxy A5U (EUR) (DT) Workqueue: events deferred_probe_work_func pstate: 80000005 (Nzcv daif -PAN -UAO) pc : __clean_dcache_area_poc+0x20/0x38 lr : arch_sync_dma_for_device+0x28/0x30 sp : ffff0000115736a0 x29: ffff0000115736a0 x28: 0000000000000001 x27: ffff800074830800 x26: ffff000011478000 x25: 0000000000000000 x24: 0000000000000001 x23: ffff000011478a98 x22: ffff800009fd1c10 x21: 0000000000000001 x20: ffff800075ad0a00 x19: 0000000000000000 x18: ffff0000112b2000 x17: 0000000000000000 x16: 0000000000000000 x15: 00000000fffffff0 x14: ffff000011455d70 x13: 0000000000000000 x12: 0000000000000028 x11: 0000000000000001 x10: ffff00001106c000 x9 : ffff7e0001d6b380 x8 : 0000000000001000 x7 : ffff7e0001d6b380 x6 : ffff7e0001d6b382 x5 : 0000000000000000 x4 : 0000000000001000 x3 : 000000000000003f x2 : 0000000000000040 x1 : ffffffff80001000 x0 : ffffffff80000000 Call trace: __clean_dcache_area_poc+0x20/0x38 dma_direct_sync_sg_for_device+0xb8/0xe8 get_pages+0x22c/0x250 [msm] msm_gem_get_and_pin_iova+0xdc/0x168 [msm] ... Fixes the combination of two patches: Fixes: 0036bc73ccbe (drm/msm: stop abusing dma_map/unmap for cache) Fixes: 449fa54d6815 (dma-direct: correct the physical addr in dma_direct_sync_sg_for_cpu/device) Tested-by: Stephan Gerhold Signed-off-by: Rob Clark [seanpaul changed subject to something more desriptive] Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20190730214633.17820-1-robdclark@gmail.com --- drivers/gpu/drm/msm/msm_gem.c | 47 +++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index c2114c748c2f..8cf6362e64bf 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -32,6 +32,46 @@ static bool use_pages(struct drm_gem_object *obj) return !msm_obj->vram_node; } +/* + * Cache sync.. this is a bit over-complicated, to fit dma-mapping + * API. Really GPU cache is out of scope here (handled on cmdstream) + * and all we need to do is invalidate newly allocated pages before + * mapping to CPU as uncached/writecombine. + * + * On top of this, we have the added headache, that depending on + * display generation, the display's iommu may be wired up to either + * the toplevel drm device (mdss), or to the mdp sub-node, meaning + * that here we either have dma-direct or iommu ops. + * + * Let this be a cautionary tail of abstraction gone wrong. + */ + +static void sync_for_device(struct msm_gem_object *msm_obj) +{ + struct device *dev = msm_obj->base.dev->dev; + + if (get_dma_ops(dev)) { + dma_sync_sg_for_device(dev, msm_obj->sgt->sgl, + msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + } else { + dma_map_sg(dev, msm_obj->sgt->sgl, + msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + } +} + +static void sync_for_cpu(struct msm_gem_object *msm_obj) +{ + struct device *dev = msm_obj->base.dev->dev; + + if (get_dma_ops(dev)) { + dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl, + msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + } else { + dma_unmap_sg(dev, msm_obj->sgt->sgl, + msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + } +} + /* allocate pages from VRAM carveout, used when no IOMMU: */ static struct page **get_pages_vram(struct drm_gem_object *obj, int npages) { @@ -97,8 +137,7 @@ static struct page **get_pages(struct drm_gem_object *obj) * because display controller, GPU, etc. are not coherent: */ if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED)) - dma_sync_sg_for_device(dev->dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + sync_for_device(msm_obj); } return msm_obj->pages; @@ -127,9 +166,7 @@ static void put_pages(struct drm_gem_object *obj) * GPU, etc. are not coherent: */ if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED)) - dma_sync_sg_for_cpu(obj->dev->dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, - DMA_BIDIRECTIONAL); + sync_for_cpu(msm_obj); sg_free_table(msm_obj->sgt); kfree(msm_obj->sgt); From 45385237f65aeee73641f1ef737d7273905a233f Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Thu, 25 Jul 2019 12:52:43 +0200 Subject: [PATCH 205/277] selinux: fix memory leak in policydb_init() Since roles_init() adds some entries to the role hash table, we need to destroy also its keys/values on error, otherwise we get a memory leak in the error path. Cc: Reported-by: syzbot+fee3a14d4cdf92646287@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Ondrej Mosnacek Signed-off-by: Paul Moore --- security/selinux/ss/policydb.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index daecdfb15a9c..38d0083204f1 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -274,6 +274,8 @@ static int rangetr_cmp(struct hashtab *h, const void *k1, const void *k2) return v; } +static int (*destroy_f[SYM_NUM]) (void *key, void *datum, void *datap); + /* * Initialize a policy database structure. */ @@ -321,8 +323,10 @@ static int policydb_init(struct policydb *p) out: hashtab_destroy(p->filename_trans); hashtab_destroy(p->range_tr); - for (i = 0; i < SYM_NUM; i++) + for (i = 0; i < SYM_NUM; i++) { + hashtab_map(p->symtab[i].table, destroy_f[i], NULL); hashtab_destroy(p->symtab[i].table); + } return rc; } From 50f6393f9654c561df4cdcf8e6cfba7260143601 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 14 Jun 2019 07:46:02 +0200 Subject: [PATCH 206/277] xen/swiotlb: fix condition for calling xen_destroy_contiguous_region() The condition in xen_swiotlb_free_coherent() for deciding whether to call xen_destroy_contiguous_region() is wrong: in case the region to be freed is not contiguous calling xen_destroy_contiguous_region() is the wrong thing to do: it would result in inconsistent mappings of multiple PFNs to the same MFN. This will lead to various strange crashes or data corruption. Instead of calling xen_destroy_contiguous_region() in that case a warning should be issued as that situation should never occur. Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Reviewed-by: Jan Beulich Acked-by: Konrad Rzeszutek Wilk Signed-off-by: Juergen Gross --- drivers/xen/swiotlb-xen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index d53f3493a6b9..50fd7de54969 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -361,8 +361,8 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, /* Convert the size to actually allocated. */ size = 1UL << (order + XEN_PAGE_SHIFT); - if (((dev_addr + size - 1 <= dma_mask)) || - range_straddles_page_boundary(phys, size)) + if (!WARN_ON((dev_addr + size - 1 > dma_mask) || + range_straddles_page_boundary(phys, size))) xen_destroy_contiguous_region(phys, order); xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs); From bf70726668c6116aa4976e0cc87f470be6268a2f Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 14 Jun 2019 07:46:03 +0200 Subject: [PATCH 207/277] xen/swiotlb: simplify range_straddles_page_boundary() range_straddles_page_boundary() is open coding several macros from include/xen/page.h. Use those instead. Additionally there is no need to have check_pages_physically_contiguous() as a separate function as it is used only once, so merge it into range_straddles_page_boundary(). Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Acked-by: Konrad Rzeszutek Wilk Signed-off-by: Juergen Gross --- drivers/xen/swiotlb-xen.c | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 50fd7de54969..37ddcfcfbb21 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -83,34 +83,18 @@ static inline dma_addr_t xen_virt_to_bus(void *address) return xen_phys_to_bus(virt_to_phys(address)); } -static int check_pages_physically_contiguous(unsigned long xen_pfn, - unsigned int offset, - size_t length) -{ - unsigned long next_bfn; - int i; - int nr_pages; - - next_bfn = pfn_to_bfn(xen_pfn); - nr_pages = (offset + length + XEN_PAGE_SIZE-1) >> XEN_PAGE_SHIFT; - - for (i = 1; i < nr_pages; i++) { - if (pfn_to_bfn(++xen_pfn) != ++next_bfn) - return 0; - } - return 1; -} - static inline int range_straddles_page_boundary(phys_addr_t p, size_t size) { - unsigned long xen_pfn = XEN_PFN_DOWN(p); - unsigned int offset = p & ~XEN_PAGE_MASK; + unsigned long next_bfn, xen_pfn = XEN_PFN_DOWN(p); + unsigned int i, nr_pages = XEN_PFN_UP(xen_offset_in_page(p) + size); - if (offset + size <= XEN_PAGE_SIZE) - return 0; - if (check_pages_physically_contiguous(xen_pfn, offset, size)) - return 0; - return 1; + next_bfn = pfn_to_bfn(xen_pfn); + + for (i = 1; i < nr_pages; i++) + if (pfn_to_bfn(++xen_pfn) != ++next_bfn) + return 1; + + return 0; } static int is_xen_swiotlb_buffer(dma_addr_t dma_addr) From b877ac9815a8fe7e5f6d7fdde3dc34652408840a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 14 Jun 2019 07:46:04 +0200 Subject: [PATCH 208/277] xen/swiotlb: remember having called xen_create_contiguous_region() Instead of always calling xen_destroy_contiguous_region() in case the memory is DMA-able for the used device, do so only in case it has been made DMA-able via xen_create_contiguous_region() before. This will avoid a lot of xen_destroy_contiguous_region() calls for 64-bit capable devices. As the memory in question is owned by swiotlb-xen the PG_owner_priv_1 flag of the first allocated page can be used for remembering. Signed-off-by: Juergen Gross Acked-by: Konrad Rzeszutek Wilk Signed-off-by: Juergen Gross --- drivers/xen/swiotlb-xen.c | 4 +++- include/linux/page-flags.h | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 37ddcfcfbb21..ceb681cf64bb 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -322,6 +322,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs); return NULL; } + SetPageXenRemapped(virt_to_page(ret)); } memset(ret, 0, size); return ret; @@ -346,7 +347,8 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, size = 1UL << (order + XEN_PAGE_SHIFT); if (!WARN_ON((dev_addr + size - 1 > dma_mask) || - range_straddles_page_boundary(phys, size))) + range_straddles_page_boundary(phys, size)) && + TestClearPageXenRemapped(virt_to_page(vaddr))) xen_destroy_contiguous_region(phys, order); xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index b848517da64c..f91cb8898ff0 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -152,6 +152,8 @@ enum pageflags { PG_savepinned = PG_dirty, /* Has a grant mapping of another (foreign) domain's page. */ PG_foreign = PG_owner_priv_1, + /* Remapped by swiotlb-xen. */ + PG_xen_remapped = PG_owner_priv_1, /* SLOB */ PG_slob_free = PG_private, @@ -329,6 +331,8 @@ PAGEFLAG(Pinned, pinned, PF_NO_COMPOUND) TESTSCFLAG(Pinned, pinned, PF_NO_COMPOUND) PAGEFLAG(SavePinned, savepinned, PF_NO_COMPOUND); PAGEFLAG(Foreign, foreign, PF_NO_COMPOUND); +PAGEFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND) + TESTCLEARFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND) PAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) __CLEARPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) From 0de50e40fc685fed4d6896a379b123f859ffb17b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 26 Jun 2019 16:45:49 +0100 Subject: [PATCH 209/277] drm/i915: Lift intel_engines_resume() to callers Since the reset path wants to recover the engines itself, it only wants to reinitialise the hardware using i915_gem_init_hw(). Pull the call to intel_engines_resume() to the module init/resume path so we can avoid it during reset. Fixes: 79ffac8599c4 ("drm/i915: Invert the GEM wakeref hierarchy") Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: Tvrtko Ursulin Cc: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20190626154549.10066-3-chris@chris-wilson.co.uk (cherry picked from commit 092be382a2602067766f190a113514d469162456) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 7 ++++--- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 24 ---------------------- drivers/gpu/drm/i915/gt/intel_engine_pm.h | 2 -- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 21 ++++++++++++++++++- drivers/gpu/drm/i915/gt/intel_gt_pm.h | 2 +- drivers/gpu/drm/i915/gt/intel_reset.c | 21 ++++++++++++++++++- drivers/gpu/drm/i915/i915_gem.c | 25 ++++++++++------------- 7 files changed, 56 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 05011d4a3b88..914b5d4112bb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -253,14 +253,15 @@ void i915_gem_resume(struct drm_i915_private *i915) i915_gem_restore_gtt_mappings(i915); i915_gem_restore_fences(i915); + if (i915_gem_init_hw(i915)) + goto err_wedged; + /* * As we didn't flush the kernel context before suspend, we cannot * guarantee that the context image is complete. So let's just reset * it and start again. */ - intel_gt_resume(i915); - - if (i915_gem_init_hw(i915)) + if (intel_gt_resume(i915)) goto err_wedged; intel_uc_resume(i915); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 2ce00d3dc42a..ae5b6baf6dff 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -142,27 +142,3 @@ void intel_engine_init__pm(struct intel_engine_cs *engine) { intel_wakeref_init(&engine->wakeref); } - -int intel_engines_resume(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = 0; - - intel_gt_pm_get(i915); - for_each_engine(engine, i915, id) { - intel_engine_pm_get(engine); - engine->serial++; /* kernel context lost */ - err = engine->resume(engine); - intel_engine_pm_put(engine); - if (err) { - dev_err(i915->drm.dev, - "Failed to restart %s (%d)\n", - engine->name, err); - break; - } - } - intel_gt_pm_put(i915); - - return err; -} diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index b326cd993d60..f6f213fbc98c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -17,6 +17,4 @@ void intel_engine_park(struct intel_engine_cs *engine); void intel_engine_init__pm(struct intel_engine_cs *engine); -int intel_engines_resume(struct drm_i915_private *i915); - #endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 7b5967751762..9f8f7f54191f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -5,6 +5,7 @@ */ #include "i915_drv.h" +#include "intel_engine_pm.h" #include "intel_gt_pm.h" #include "intel_pm.h" #include "intel_wakeref.h" @@ -118,10 +119,11 @@ void intel_gt_sanitize(struct drm_i915_private *i915, bool force) intel_engine_reset(engine, false); } -void intel_gt_resume(struct drm_i915_private *i915) +int intel_gt_resume(struct drm_i915_private *i915) { struct intel_engine_cs *engine; enum intel_engine_id id; + int err = 0; /* * After resume, we may need to poke into the pinned kernel @@ -129,9 +131,12 @@ void intel_gt_resume(struct drm_i915_private *i915) * Only the kernel contexts should remain pinned over suspend, * allowing us to fixup the user contexts on their first pin. */ + intel_gt_pm_get(i915); for_each_engine(engine, i915, id) { struct intel_context *ce; + intel_engine_pm_get(engine); + ce = engine->kernel_context; if (ce) ce->ops->reset(ce); @@ -139,5 +144,19 @@ void intel_gt_resume(struct drm_i915_private *i915) ce = engine->preempt_context; if (ce) ce->ops->reset(ce); + + engine->serial++; /* kernel context lost */ + err = engine->resume(engine); + + intel_engine_pm_put(engine); + if (err) { + dev_err(i915->drm.dev, + "Failed to restart %s (%d)\n", + engine->name, err); + break; + } } + intel_gt_pm_put(i915); + + return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index 7dd1130a19a4..53f342b20181 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -22,6 +22,6 @@ void intel_gt_pm_put(struct drm_i915_private *i915); void intel_gt_pm_init(struct drm_i915_private *i915); void intel_gt_sanitize(struct drm_i915_private *i915, bool force); -void intel_gt_resume(struct drm_i915_private *i915); +int intel_gt_resume(struct drm_i915_private *i915); #endif /* INTEL_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 4c478b38e420..0439ed66e969 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -951,6 +951,21 @@ static int do_reset(struct drm_i915_private *i915, return gt_reset(i915, stalled_mask); } +static int resume(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int ret; + + for_each_engine(engine, i915, id) { + ret = engine->resume(engine); + if (ret) + return ret; + } + + return 0; +} + /** * i915_reset - reset chip after a hang * @i915: #drm_i915_private to reset @@ -1024,9 +1039,13 @@ void i915_reset(struct drm_i915_private *i915, if (ret) { DRM_ERROR("Failed to initialise HW following reset (%d)\n", ret); - goto error; + goto taint; } + ret = resume(i915); + if (ret) + goto taint; + i915_queue_hangcheck(i915); finish: diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 190ad54fb072..8a659d3d7435 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -46,7 +46,6 @@ #include "gem/i915_gem_ioctls.h" #include "gem/i915_gem_pm.h" #include "gem/i915_gemfs.h" -#include "gt/intel_engine_pm.h" #include "gt/intel_gt_pm.h" #include "gt/intel_mocs.h" #include "gt/intel_reset.h" @@ -1307,21 +1306,13 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) intel_mocs_init_l3cc_table(dev_priv); - /* Only when the HW is re-initialised, can we replay the requests */ - ret = intel_engines_resume(dev_priv); - if (ret) - goto cleanup_uc; - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); intel_engines_set_scheduler_caps(dev_priv); return 0; -cleanup_uc: - intel_uc_fini_hw(dev_priv); out: intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - return ret; } @@ -1580,6 +1571,11 @@ int i915_gem_init(struct drm_i915_private *dev_priv) if (ret) goto err_uc_init; + /* Only when the HW is re-initialised, can we replay the requests */ + ret = intel_gt_resume(dev_priv); + if (ret) + goto err_init_hw; + /* * Despite its name intel_init_clock_gating applies both display * clock gating workarounds; GT mmio workarounds and the occasional @@ -1593,20 +1589,20 @@ int i915_gem_init(struct drm_i915_private *dev_priv) ret = intel_engines_verify_workarounds(dev_priv); if (ret) - goto err_init_hw; + goto err_gt; ret = __intel_engines_record_defaults(dev_priv); if (ret) - goto err_init_hw; + goto err_gt; if (i915_inject_load_failure()) { ret = -ENODEV; - goto err_init_hw; + goto err_gt; } if (i915_inject_load_failure()) { ret = -EIO; - goto err_init_hw; + goto err_gt; } intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); @@ -1620,7 +1616,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) * HW as irrevisibly wedged, but keep enough state around that the * driver doesn't explode during runtime. */ -err_init_hw: +err_gt: mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_set_wedged(dev_priv); @@ -1630,6 +1626,7 @@ err_init_hw: i915_gem_drain_workqueue(dev_priv); mutex_lock(&dev_priv->drm.struct_mutex); +err_init_hw: intel_uc_fini_hw(dev_priv); err_uc_init: intel_uc_fini(dev_priv); From b1fa6fd94fc6a5d6be85359743b5f3626f3f881c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 26 Jun 2019 16:45:47 +0100 Subject: [PATCH 210/277] drm/i915: Add a wakeref getter for iff the wakeref is already active For use in the next patch, we want to acquire a wakeref without having to wake the device up -- i.e. only acquire the engine wakeref if the engine is already active. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190626154549.10066-1-chris@chris-wilson.co.uk (cherry picked from commit de5147b8ce6d51f634661d7c531385371485cec6) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_engine_pm.h | 10 +++++++++- drivers/gpu/drm/i915/intel_wakeref.h | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index f6f213fbc98c..a11c893f64c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -7,12 +7,20 @@ #ifndef INTEL_ENGINE_PM_H #define INTEL_ENGINE_PM_H +#include "intel_engine_types.h" +#include "intel_wakeref.h" + struct drm_i915_private; -struct intel_engine_cs; void intel_engine_pm_get(struct intel_engine_cs *engine); void intel_engine_pm_put(struct intel_engine_cs *engine); +static inline bool +intel_engine_pm_get_if_awake(struct intel_engine_cs *engine) +{ + return intel_wakeref_get_if_active(&engine->wakeref); +} + void intel_engine_park(struct intel_engine_cs *engine); void intel_engine_init__pm(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h index 9cbb2ebf575b..38275310b196 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.h +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -65,6 +65,21 @@ intel_wakeref_get(struct intel_runtime_pm *rpm, return 0; } +/** + * intel_wakeref_get_if_in_use: Acquire the wakeref + * @wf: the wakeref + * + * Acquire a hold on the wakeref, but only if the wakeref is already + * active. + * + * Returns: true if the wakeref was acquired, false otherwise. + */ +static inline bool +intel_wakeref_get_if_active(struct intel_wakeref *wf) +{ + return atomic_inc_not_zero(&wf->count); +} + /** * intel_wakeref_put: Release the wakeref * @i915: the drm_i915_private device From 4b9bb9728c915c6079619e71e3340fe4840d9d40 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 26 Jun 2019 16:45:48 +0100 Subject: [PATCH 211/277] drm/i915: Only recover active engines If we issue a reset to a currently idle engine, leave it idle afterwards. This is useful to excise a linkage between reset and the shrinker. When waking the engine, we need to pin the default context image which we use for overwriting a guilty context -- if the engine is idle we do not need this pinned image! However, this pinning means that waking the engine acquires the FS_RECLAIM, and so may trigger the shrinker. The shrinker itself may need to wait upon the GPU to unbind and object and so may require services of reset; ergo we should avoid the engine wake up path. The danger in skipping the recovery for idle engines is that we leave the engine with no context defined, which may interfere with the operation of the power context on some older platforms. In practice, we should only be resetting an active GPU but it something to look out for on Ironlake (if memory serves). Fixes: 79ffac8599c4 ("drm/i915: Invert the GEM wakeref hierarchy") Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: Tvrtko Ursulin Cc: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20190626154549.10066-2-chris@chris-wilson.co.uk (cherry picked from commit 18398904ca9e3ddd180e2ecd45886e146b1d9d5b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_reset.c | 37 ++++++++++++++---------- drivers/gpu/drm/i915/gt/selftest_reset.c | 5 +++- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 0439ed66e969..3f907701ef4d 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -687,7 +687,6 @@ static void reset_prepare_engine(struct intel_engine_cs *engine) * written to the powercontext is undefined and so we may lose * GPU state upon resume, i.e. fail to restart after a reset. */ - intel_engine_pm_get(engine); intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); engine->reset.prepare(engine); } @@ -718,16 +717,21 @@ static void revoke_mmaps(struct drm_i915_private *i915) } } -static void reset_prepare(struct drm_i915_private *i915) +static intel_engine_mask_t reset_prepare(struct drm_i915_private *i915) { struct intel_engine_cs *engine; + intel_engine_mask_t awake = 0; enum intel_engine_id id; - intel_gt_pm_get(i915); - for_each_engine(engine, i915, id) + for_each_engine(engine, i915, id) { + if (intel_engine_pm_get_if_awake(engine)) + awake |= engine->mask; reset_prepare_engine(engine); + } intel_uc_reset_prepare(i915); + + return awake; } static void gt_revoke(struct drm_i915_private *i915) @@ -761,20 +765,22 @@ static int gt_reset(struct drm_i915_private *i915, static void reset_finish_engine(struct intel_engine_cs *engine) { engine->reset.finish(engine); - intel_engine_pm_put(engine); intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); + + intel_engine_signal_breadcrumbs(engine); } -static void reset_finish(struct drm_i915_private *i915) +static void reset_finish(struct drm_i915_private *i915, + intel_engine_mask_t awake) { struct intel_engine_cs *engine; enum intel_engine_id id; for_each_engine(engine, i915, id) { reset_finish_engine(engine); - intel_engine_signal_breadcrumbs(engine); + if (awake & engine->mask) + intel_engine_pm_put(engine); } - intel_gt_pm_put(i915); } static void nop_submit_request(struct i915_request *request) @@ -798,6 +804,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) { struct i915_gpu_error *error = &i915->gpu_error; struct intel_engine_cs *engine; + intel_engine_mask_t awake; enum intel_engine_id id; if (test_bit(I915_WEDGED, &error->flags)) @@ -817,7 +824,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) * rolling the global seqno forward (since this would complete requests * for which we haven't set the fence error to EIO yet). */ - reset_prepare(i915); + awake = reset_prepare(i915); /* Even if the GPU reset fails, it should still stop the engines */ if (!INTEL_INFO(i915)->gpu_reset_clobbers_display) @@ -841,7 +848,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) for_each_engine(engine, i915, id) engine->cancel_requests(engine); - reset_finish(i915); + reset_finish(i915, awake); GEM_TRACE("end\n"); } @@ -988,6 +995,7 @@ void i915_reset(struct drm_i915_private *i915, const char *reason) { struct i915_gpu_error *error = &i915->gpu_error; + intel_engine_mask_t awake; int ret; GEM_TRACE("flags=%lx\n", error->flags); @@ -1004,7 +1012,7 @@ void i915_reset(struct drm_i915_private *i915, dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); error->reset_count++; - reset_prepare(i915); + awake = reset_prepare(i915); if (!intel_has_gpu_reset(i915)) { if (i915_modparams.reset) @@ -1049,7 +1057,7 @@ void i915_reset(struct drm_i915_private *i915, i915_queue_hangcheck(i915); finish: - reset_finish(i915); + reset_finish(i915, awake); unlock: mutex_unlock(&error->wedge_mutex); return; @@ -1100,7 +1108,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); - if (!intel_wakeref_active(&engine->wakeref)) + if (!intel_engine_pm_get_if_awake(engine)) return 0; reset_prepare_engine(engine); @@ -1135,12 +1143,11 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) * process to program RING_MODE, HWSP and re-enable submission. */ ret = engine->resume(engine); - if (ret) - goto out; out: intel_engine_cancel_stop_cs(engine); reset_finish_engine(engine); + intel_engine_pm_put(engine); return ret; } diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 89da9e7cc1ba..b5c590c9ccba 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -71,13 +71,16 @@ static int igt_atomic_reset(void *arg) goto unlock; for (p = igt_atomic_phases; p->name; p++) { + intel_engine_mask_t awake; + GEM_TRACE("intel_gpu_reset under %s\n", p->name); + awake = reset_prepare(i915); p->critical_section_begin(); reset_prepare(i915); err = intel_gpu_reset(i915, ALL_ENGINES); - reset_finish(i915); p->critical_section_end(); + reset_finish(i915, awake); if (err) { pr_err("intel_gpu_reset failed under %s\n", p->name); From 3fe6c873af2f2247544debdbe51ec29f690a2ccf Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 1 Aug 2019 13:33:39 +0200 Subject: [PATCH 212/277] parisc: Fix build of compressed kernel even with debug enabled With debug info enabled (CONFIG_DEBUG_INFO=y) the resulting vmlinux may get that huge that we need to increase the start addresss for the decompression text section otherwise one will face a linker error. Reported-by: Sven Schnelle Tested-by: Sven Schnelle Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Helge Deller --- arch/parisc/boot/compressed/vmlinux.lds.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/boot/compressed/vmlinux.lds.S b/arch/parisc/boot/compressed/vmlinux.lds.S index bfd7872739a3..2ac3a643f2eb 100644 --- a/arch/parisc/boot/compressed/vmlinux.lds.S +++ b/arch/parisc/boot/compressed/vmlinux.lds.S @@ -48,8 +48,8 @@ SECTIONS *(.rodata.compressed) } - /* bootloader code and data starts behind area of extracted kernel */ - . = (SZ_end - SZparisc_kernel_start + KERNEL_BINARY_TEXT_START); + /* bootloader code and data starts at least behind area of extracted kernel */ + . = MAX(ABSOLUTE(.), (SZ_end - SZparisc_kernel_start + KERNEL_BINARY_TEXT_START)); /* align on next page boundary */ . = ALIGN(4096); From e50beea8e7738377b4fa664078547be338038ff9 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 1 Aug 2019 13:42:18 +0200 Subject: [PATCH 213/277] parisc: Strip debug info from kernel before creating compressed vmlinuz Same as on x86-64, strip the .comment, .note and debug sections from the Linux kernel before creating the compressed image for the boot loader. Reported-by: James Bottomley Reported-by: Sven Schnelle Cc: stable@vger.kernel.org # v4.20+ Signed-off-by: Helge Deller --- arch/parisc/boot/compressed/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile index 2da8624e5cf6..3b28d1b92218 100644 --- a/arch/parisc/boot/compressed/Makefile +++ b/arch/parisc/boot/compressed/Makefile @@ -55,7 +55,8 @@ $(obj)/misc.o: $(obj)/sizes.h CPPFLAGS_vmlinux.lds += -I$(objtree)/$(obj) -DBOOTLOADER $(obj)/vmlinux.lds: $(obj)/sizes.h -$(obj)/vmlinux.bin: vmlinux +OBJCOPYFLAGS_vmlinux.bin := -R .comment -R .note -S +$(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) vmlinux.bin.all-y := $(obj)/vmlinux.bin From f2c5ed0dd5004c2cff5c0e3d430a107576fcc17f Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 1 Aug 2019 13:47:03 +0200 Subject: [PATCH 214/277] parisc: Add archclean Makefile target Apparently we don't have an archclean target in our arch/parisc/Makefile, so files in there never get cleaned out by make mrproper. This, in turn means that the sizes.h file in arch/parisc/boot/compressed never gets removed and worse, when you transition to an O=build/parisc[64] build model it overrides the generated file. The upshot being my bzImage was building with a SZ_end that was too small. I fixed it by making mrproper clean everything. Signed-off-by: James Bottomley Cc: stable@vger.kernel.org # v4.20+ Signed-off-by: Helge Deller --- arch/parisc/Makefile | 3 +++ arch/parisc/boot/compressed/Makefile | 1 + 2 files changed, 4 insertions(+) diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index b10a1179291a..3b77d729057f 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -180,5 +180,8 @@ define archhelp @echo ' zinstall - Install compressed vmlinuz kernel' endef +archclean: + $(Q)$(MAKE) $(clean)=$(boot) + archheaders: $(Q)$(MAKE) $(build)=arch/parisc/kernel/syscalls all diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile index 3b28d1b92218..1e5879c6a752 100644 --- a/arch/parisc/boot/compressed/Makefile +++ b/arch/parisc/boot/compressed/Makefile @@ -12,6 +12,7 @@ UBSAN_SANITIZE := n targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 targets += misc.o piggy.o sizes.h head.o real2.o firmware.o +targets += real2.S firmware.c KBUILD_CFLAGS := -D__KERNEL__ -O2 -DBOOTLOADER KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING From fd01eecdf9591453177d7b06faaabef8c300114a Mon Sep 17 00:00:00 2001 From: Rayagonda Kokatanur Date: Wed, 24 Jul 2019 13:58:27 +0530 Subject: [PATCH 215/277] i2c: iproc: Fix i2c master read more than 63 bytes Use SMBUS_MASTER_DATA_READ.MASTER_RD_STATUS bit to check for RX FIFO empty condition because SMBUS_MASTER_FIFO_CONTROL.MASTER_RX_PKT_COUNT is not updated for read >= 64 bytes. This fixes the issue when trying to read from the I2C slave more than 63 bytes. Fixes: c24b8d574b7c ("i2c: iproc: Extend I2C read up to 255 bytes") Cc: stable@kernel.org Signed-off-by: Rayagonda Kokatanur Reviewed-by: Ray Jui Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-bcm-iproc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c index 2c7f145a036e..d7fd76baec92 100644 --- a/drivers/i2c/busses/i2c-bcm-iproc.c +++ b/drivers/i2c/busses/i2c-bcm-iproc.c @@ -392,16 +392,18 @@ static bool bcm_iproc_i2c_slave_isr(struct bcm_iproc_i2c_dev *iproc_i2c, static void bcm_iproc_i2c_read_valid_bytes(struct bcm_iproc_i2c_dev *iproc_i2c) { struct i2c_msg *msg = iproc_i2c->msg; + uint32_t val; /* Read valid data from RX FIFO */ while (iproc_i2c->rx_bytes < msg->len) { - if (!((iproc_i2c_rd_reg(iproc_i2c, M_FIFO_CTRL_OFFSET) >> M_FIFO_RX_CNT_SHIFT) - & M_FIFO_RX_CNT_MASK)) + val = iproc_i2c_rd_reg(iproc_i2c, M_RX_OFFSET); + + /* rx fifo empty */ + if (!((val >> M_RX_STATUS_SHIFT) & M_RX_STATUS_MASK)) break; msg->buf[iproc_i2c->rx_bytes] = - (iproc_i2c_rd_reg(iproc_i2c, M_RX_OFFSET) >> - M_RX_DATA_SHIFT) & M_RX_DATA_MASK; + (val >> M_RX_DATA_SHIFT) & M_RX_DATA_MASK; iproc_i2c->rx_bytes++; } } From d9b42dfab513c9130ee0458f2e6febb75c89d1c8 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 3 Jul 2019 09:58:18 +0200 Subject: [PATCH 216/277] drm/client: Support unmapping of DRM client buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DRM clients, such as the fbdev emulation, have their buffer objects mapped by default. Mapping a buffer implicitly prevents its relocation. Hence, the buffer may permanently consume video memory while it's allocated. This is a problem for drivers of low-memory devices, such as ast, mgag200 or older framebuffer hardware, which will then not have enough memory to display other content (e.g., X11). This patch introduces drm_client_buffer_vmap() and _vunmap(). Internal DRM clients can use these functions to unmap and remap buffer objects as needed. There's no reference counting for vmap operations. Callers are expected to either keep buffers mapped (as it is now), or call vmap and vunmap in pairs around code that accesses the mapped memory. v2: * remove several duplicated NULL-pointer checks v3: * style and typo fixes Signed-off-by: Thomas Zimmermann Reviewed-by: Noralf Trønnes Link: https://patchwork.freedesktop.org/patch/315831/ Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/drm_client.c | 68 ++++++++++++++++++++++++++++++------ include/drm/drm_client.h | 2 ++ 2 files changed, 59 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c index 410572f14257..fb107b24baae 100644 --- a/drivers/gpu/drm/drm_client.c +++ b/drivers/gpu/drm/drm_client.c @@ -281,22 +281,12 @@ drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, u buffer->gem = obj; - /* - * FIXME: The dependency on GEM here isn't required, we could - * convert the driver handle to a dma-buf instead and use the - * backend-agnostic dma-buf vmap support instead. This would - * require that the handle2fd prime ioctl is reworked to pull the - * fd_install step out of the driver backend hooks, to make that - * final step optional for internal users. - */ - vaddr = drm_gem_vmap(obj); + vaddr = drm_client_buffer_vmap(buffer); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); goto err_delete; } - buffer->vaddr = vaddr; - return buffer; err_delete: @@ -305,6 +295,62 @@ err_delete: return ERR_PTR(ret); } +/** + * drm_client_buffer_vmap - Map DRM client buffer into address space + * @buffer: DRM client buffer + * + * This function maps a client buffer into kernel address space. If the + * buffer is already mapped, it returns the mapping's address. + * + * Client buffer mappings are not ref'counted. Each call to + * drm_client_buffer_vmap() should be followed by a call to + * drm_client_buffer_vunmap(); or the client buffer should be mapped + * throughout its lifetime. The latter is the default. + * + * Returns: + * The mapped memory's address + */ +void *drm_client_buffer_vmap(struct drm_client_buffer *buffer) +{ + void *vaddr; + + if (buffer->vaddr) + return buffer->vaddr; + + /* + * FIXME: The dependency on GEM here isn't required, we could + * convert the driver handle to a dma-buf instead and use the + * backend-agnostic dma-buf vmap support instead. This would + * require that the handle2fd prime ioctl is reworked to pull the + * fd_install step out of the driver backend hooks, to make that + * final step optional for internal users. + */ + vaddr = drm_gem_vmap(buffer->gem); + if (IS_ERR(vaddr)) + return vaddr; + + buffer->vaddr = vaddr; + + return vaddr; +} +EXPORT_SYMBOL(drm_client_buffer_vmap); + +/** + * drm_client_buffer_vunmap - Unmap DRM client buffer + * @buffer: DRM client buffer + * + * This function removes a client buffer's memory mapping. This + * function is only required by clients that manage their buffers + * by themselves. By default, DRM client buffers are mapped throughout + * their entire lifetime. + */ +void drm_client_buffer_vunmap(struct drm_client_buffer *buffer) +{ + drm_gem_vunmap(buffer->gem, buffer->vaddr); + buffer->vaddr = NULL; +} +EXPORT_SYMBOL(drm_client_buffer_vunmap); + static void drm_client_buffer_rmfb(struct drm_client_buffer *buffer) { int ret; diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h index 72d51d1e9dd9..5cf2c5dd8b1e 100644 --- a/include/drm/drm_client.h +++ b/include/drm/drm_client.h @@ -149,6 +149,8 @@ struct drm_client_buffer { struct drm_client_buffer * drm_client_framebuffer_create(struct drm_client_dev *client, u32 width, u32 height, u32 format); void drm_client_framebuffer_delete(struct drm_client_buffer *buffer); +void *drm_client_buffer_vmap(struct drm_client_buffer *buffer); +void drm_client_buffer_vunmap(struct drm_client_buffer *buffer); int drm_client_modeset_create(struct drm_client_dev *client); void drm_client_modeset_free(struct drm_client_dev *client); From 87e281f88f3aa4ed401554f793685bcb2463580a Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 3 Jul 2019 09:58:24 +0200 Subject: [PATCH 217/277] drm/fb-helper: Map DRM client buffer only when required MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch changes DRM clients to not map the buffer by default. The buffer, like any buffer object, should be mapped and unmapped when needed. An unmapped buffer object can be evicted to system memory and does not consume video ram until displayed. This allows to use generic fbdev emulation with drivers for low-memory devices, such as ast and mgag200. This change affects the generic framebuffer console. HW-based consoles map their console buffer once and keep it mapped. Userspace can mmap this buffer into its address space. The shadow-buffered framebuffer console only needs the buffer object to be mapped during updates. While not being updated from the shadow buffer, the buffer object can remain unmapped. Userspace will always mmap the shadow buffer. v2: * change DRM client to not map buffer by default * manually map client buffer for fbdev with HW framebuffer Signed-off-by: Thomas Zimmermann Reviewed-by: Noralf Trønnes Link: https://patchwork.freedesktop.org/patch/315830/ Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/drm_client.c | 16 ++++------------ drivers/gpu/drm/drm_fb_helper.c | 33 +++++++++++++++++++++++++-------- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c index fb107b24baae..e1dafb0cc5e2 100644 --- a/drivers/gpu/drm/drm_client.c +++ b/drivers/gpu/drm/drm_client.c @@ -254,7 +254,6 @@ drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, u struct drm_device *dev = client->dev; struct drm_client_buffer *buffer; struct drm_gem_object *obj; - void *vaddr; int ret; buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); @@ -281,12 +280,6 @@ drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, u buffer->gem = obj; - vaddr = drm_client_buffer_vmap(buffer); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - goto err_delete; - } - return buffer; err_delete: @@ -305,7 +298,7 @@ err_delete: * Client buffer mappings are not ref'counted. Each call to * drm_client_buffer_vmap() should be followed by a call to * drm_client_buffer_vunmap(); or the client buffer should be mapped - * throughout its lifetime. The latter is the default. + * throughout its lifetime. * * Returns: * The mapped memory's address @@ -339,10 +332,9 @@ EXPORT_SYMBOL(drm_client_buffer_vmap); * drm_client_buffer_vunmap - Unmap DRM client buffer * @buffer: DRM client buffer * - * This function removes a client buffer's memory mapping. This - * function is only required by clients that manage their buffers - * by themselves. By default, DRM client buffers are mapped throughout - * their entire lifetime. + * This function removes a client buffer's memory mapping. Calling this + * function is only required by clients that manage their buffer mappings + * by themselves. */ void drm_client_buffer_vunmap(struct drm_client_buffer *buffer) { diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 1984e5c54d58..7ba6a0255821 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -403,6 +403,7 @@ static void drm_fb_helper_dirty_work(struct work_struct *work) struct drm_clip_rect *clip = &helper->dirty_clip; struct drm_clip_rect clip_copy; unsigned long flags; + void *vaddr; spin_lock_irqsave(&helper->dirty_lock, flags); clip_copy = *clip; @@ -412,10 +413,18 @@ static void drm_fb_helper_dirty_work(struct work_struct *work) /* call dirty callback only when it has been really touched */ if (clip_copy.x1 < clip_copy.x2 && clip_copy.y1 < clip_copy.y2) { + /* Generic fbdev uses a shadow buffer */ - if (helper->buffer) + if (helper->buffer) { + vaddr = drm_client_buffer_vmap(helper->buffer); + if (IS_ERR(vaddr)) + return; drm_fb_helper_dirty_blit_real(helper, &clip_copy); + } helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, &clip_copy, 1); + + if (helper->buffer) + drm_client_buffer_vunmap(helper->buffer); } } @@ -2178,6 +2187,7 @@ int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, struct drm_framebuffer *fb; struct fb_info *fbi; u32 format; + void *vaddr; DRM_DEBUG_KMS("surface width(%d), height(%d) and bpp(%d)\n", sizes->surface_width, sizes->surface_height, @@ -2200,13 +2210,7 @@ int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, fbi->fbops = &drm_fbdev_fb_ops; fbi->screen_size = fb->height * fb->pitches[0]; fbi->fix.smem_len = fbi->screen_size; - fbi->screen_buffer = buffer->vaddr; - /* Shamelessly leak the physical address to user-space */ -#if IS_ENABLED(CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM) - if (drm_leak_fbdev_smem && fbi->fix.smem_start == 0) - fbi->fix.smem_start = - page_to_phys(virt_to_page(fbi->screen_buffer)); -#endif + drm_fb_helper_fill_info(fbi, fb_helper, sizes); if (fb->funcs->dirty) { @@ -2231,6 +2235,19 @@ int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, fbi->fbdefio = &drm_fbdev_defio; fb_deferred_io_init(fbi); + } else { + /* buffer is mapped for HW framebuffer */ + vaddr = drm_client_buffer_vmap(fb_helper->buffer); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + fbi->screen_buffer = vaddr; + /* Shamelessly leak the physical address to user-space */ +#if IS_ENABLED(CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM) + if (drm_leak_fbdev_smem && fbi->fix.smem_start == 0) + fbi->fix.smem_start = + page_to_phys(virt_to_page(fbi->screen_buffer)); +#endif } return 0; From 01b947afaa940327e7adf57070a4bf3d0bed9810 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 5 Jul 2019 09:31:00 +0200 Subject: [PATCH 218/277] drm/fb-helper: Instanciate shadow FB if configured in device's mode_config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Generic framebuffer emulation uses a shadow buffer for framebuffers with dirty() function. If drivers want to use the shadow FB without such a function, they can now set prefer_shadow or prefer_shadow_fbdev in their mode_config structures. The former flag is exported to userspace, the latter flag is fbdev-only. v3: * only schedule dirty worker if fbdev uses shadow fb * test shadow fb settings with boolean operators * use bool for struct drm_mode_config.prefer_shadow_fbdev * fix documentation comments Signed-off-by: Thomas Zimmermann Reviewed-by: Noralf Trønnes Tested-by: Noralf Trønnes Link: https://patchwork.freedesktop.org/patch/315834/ Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/drm_fb_helper.c | 18 +++++++++++++++--- include/drm/drm_mode_config.h | 7 +++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 7ba6a0255821..a7ba5b4902d6 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -421,7 +421,9 @@ static void drm_fb_helper_dirty_work(struct work_struct *work) return; drm_fb_helper_dirty_blit_real(helper, &clip_copy); } - helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, &clip_copy, 1); + if (helper->fb->funcs->dirty) + helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, + &clip_copy, 1); if (helper->buffer) drm_client_buffer_vunmap(helper->buffer); @@ -613,6 +615,16 @@ void drm_fb_helper_unlink_fbi(struct drm_fb_helper *fb_helper) } EXPORT_SYMBOL(drm_fb_helper_unlink_fbi); +static bool drm_fbdev_use_shadow_fb(struct drm_fb_helper *fb_helper) +{ + struct drm_device *dev = fb_helper->dev; + struct drm_framebuffer *fb = fb_helper->fb; + + return dev->mode_config.prefer_shadow_fbdev || + dev->mode_config.prefer_shadow || + fb->funcs->dirty; +} + static void drm_fb_helper_dirty(struct fb_info *info, u32 x, u32 y, u32 width, u32 height) { @@ -620,7 +632,7 @@ static void drm_fb_helper_dirty(struct fb_info *info, u32 x, u32 y, struct drm_clip_rect *clip = &helper->dirty_clip; unsigned long flags; - if (!helper->fb->funcs->dirty) + if (!drm_fbdev_use_shadow_fb(helper)) return; spin_lock_irqsave(&helper->dirty_lock, flags); @@ -2213,7 +2225,7 @@ int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, drm_fb_helper_fill_info(fbi, fb_helper, sizes); - if (fb->funcs->dirty) { + if (drm_fbdev_use_shadow_fb(fb_helper)) { struct fb_ops *fbops; void *shadow; diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 759d462d028b..f57eea0481e0 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -852,6 +852,13 @@ struct drm_mode_config { /* dumb ioctl parameters */ uint32_t preferred_depth, prefer_shadow; + /** + * @prefer_shadow_fbdev: + * + * Hint to framebuffer emulation to prefer shadow-fb rendering. + */ + bool prefer_shadow_fbdev; + /** * @quirk_addfb_prefer_xbgr_30bpp: * From 58540594570778fd149cd8c9b2bff61f2cefa8c9 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 3 Jul 2019 09:58:34 +0200 Subject: [PATCH 219/277] drm/bochs: Use shadow buffer for bochs framebuffer console MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bochs driver (and virtual hardware) requires buffer objects to reside in video ram to display them to the screen. So it can not display the framebuffer console because the respective buffer object is permanently pinned in system memory. Using a shadow buffer for the console solves this problem. The console emulation will pin the buffer object only during updates from the shadow buffer. Otherwise, the bochs driver can freely relocated the buffer between system memory and video ram. v2: * select shadow FB via struct drm_mode_config.prefer_shadow_fbdev Signed-off-by: Thomas Zimmermann Acked-by: Noralf Trønnes Link: https://patchwork.freedesktop.org/patch/315833/ Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/bochs/bochs_kms.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/bochs/bochs_kms.c b/drivers/gpu/drm/bochs/bochs_kms.c index bc19dbd531ef..359030d5d818 100644 --- a/drivers/gpu/drm/bochs/bochs_kms.c +++ b/drivers/gpu/drm/bochs/bochs_kms.c @@ -191,6 +191,7 @@ int bochs_kms_init(struct bochs_device *bochs) bochs->dev->mode_config.fb_base = bochs->fb_base; bochs->dev->mode_config.preferred_depth = 24; bochs->dev->mode_config.prefer_shadow = 0; + bochs->dev->mode_config.prefer_shadow_fbdev = 1; bochs->dev->mode_config.quirk_addfb_prefer_host_byte_order = true; bochs->dev->mode_config.funcs = &bochs_mode_funcs; From 677379bc9139ac24b310a281fcb21a2f04288353 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Mon, 29 Jul 2019 15:57:46 +0100 Subject: [PATCH 220/277] arm64: Lower priority mask for GIC_PRIO_IRQON On a system with two security states, if SCR_EL3.FIQ is cleared, non-secure IRQ priorities get shifted to fit the secure view but priority masks aren't. On such system, it turns out that GIC_PRIO_IRQON masks the priority of normal interrupts, which obviously ends up in a hang. Increase GIC_PRIO_IRQON value (i.e. lower priority) to make sure interrupts are not blocked by it. Cc: Oleg Nesterov Fixes: bd82d4bd21880b7c ("arm64: Fix incorrect irqflag restore for priority masking") Acked-by: Marc Zyngier Signed-off-by: Julien Thierry Signed-off-by: Catalin Marinas [will: fixed Fixes: tag] Signed-off-by: Will Deacon --- arch/arm64/include/asm/arch_gicv3.h | 6 ++++++ arch/arm64/include/asm/ptrace.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 79155a8cfe7c..89e4c8b79349 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -155,6 +155,12 @@ static inline void gic_pmr_mask_irqs(void) BUILD_BUG_ON(GICD_INT_DEF_PRI < (GIC_PRIO_IRQOFF | GIC_PRIO_PSR_I_SET)); BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON); + /* + * Need to make sure IRQON allows IRQs when SCR_EL3.FIQ is cleared + * and non-secure PMR accesses are not subject to the shifts that + * are applied to IRQ priorities + */ + BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) >= GIC_PRIO_IRQON); gic_write_pmr(GIC_PRIO_IRQOFF); } diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index b1dd039023ef..1dcf63a9ac1f 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -30,7 +30,7 @@ * in the the priority mask, it indicates that PSR.I should be set and * interrupt disabling temporarily does not rely on IRQ priorities. */ -#define GIC_PRIO_IRQON 0xc0 +#define GIC_PRIO_IRQON 0xe0 #define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) #define GIC_PRIO_PSR_I_SET (1 << 4) From ee07b93e7721ccd5d5b9fa6f0c10cb3fe2f1f4f9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 25 Jul 2019 17:16:05 +0900 Subject: [PATCH 221/277] arm64: unwind: Prohibit probing on return_address() Prohibit probing on return_address() and subroutines which is called from return_address(), since the it is invoked from trace_hardirqs_off() which is also kprobe blacklisted. Reported-by: Naresh Kamboju Signed-off-by: Masami Hiramatsu Signed-off-by: Will Deacon --- arch/arm64/kernel/return_address.c | 3 +++ arch/arm64/kernel/stacktrace.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c index c4ae647d2306..a5e8b3b9d798 100644 --- a/arch/arm64/kernel/return_address.c +++ b/arch/arm64/kernel/return_address.c @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -29,6 +30,7 @@ static int save_return_addr(struct stackframe *frame, void *d) return 0; } } +NOKPROBE_SYMBOL(save_return_addr); void *return_address(unsigned int level) { @@ -49,3 +51,4 @@ void *return_address(unsigned int level) return NULL; } EXPORT_SYMBOL_GPL(return_address); +NOKPROBE_SYMBOL(return_address); diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 2b160ae594eb..a336cb124320 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -111,6 +112,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) return 0; } +NOKPROBE_SYMBOL(unwind_frame); void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data) @@ -125,6 +127,7 @@ void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, break; } } +NOKPROBE_SYMBOL(walk_stackframe); #ifdef CONFIG_STACKTRACE struct stack_trace_data { From 760d8ed069c4e32a92e2ba251a3b0d9a87a3e771 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 25 Jul 2019 17:16:25 +0900 Subject: [PATCH 222/277] arm64: Remove unneeded rcu_read_lock from debug handlers Remove rcu_read_lock()/rcu_read_unlock() from debug exception handlers since we are sure those are not preemptible and interrupts are off. Acked-by: Paul E. McKenney Signed-off-by: Masami Hiramatsu Signed-off-by: Will Deacon --- arch/arm64/kernel/debug-monitors.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index f8719bd30850..48222a4760c2 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -207,16 +207,16 @@ static int call_step_hook(struct pt_regs *regs, unsigned int esr) list = user_mode(regs) ? &user_step_hook : &kernel_step_hook; - rcu_read_lock(); - + /* + * Since single-step exception disables interrupt, this function is + * entirely not preemptible, and we can use rcu list safely here. + */ list_for_each_entry_rcu(hook, list, node) { retval = hook->fn(regs, esr); if (retval == DBG_HOOK_HANDLED) break; } - rcu_read_unlock(); - return retval; } NOKPROBE_SYMBOL(call_step_hook); @@ -305,14 +305,16 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr) list = user_mode(regs) ? &user_break_hook : &kernel_break_hook; - rcu_read_lock(); + /* + * Since brk exception disables interrupt, this function is + * entirely not preemptible, and we can use rcu list safely here. + */ list_for_each_entry_rcu(hook, list, node) { unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; if ((comment & ~hook->mask) == hook->imm) fn = hook->fn; } - rcu_read_unlock(); return fn ? fn(regs, esr) : DBG_HOOK_ERROR; } From 7d4e2dcf311d3b98421d1f119efe5964cafa32fc Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Wed, 31 Jul 2019 16:05:45 -0400 Subject: [PATCH 223/277] arm64/mm: fix variable 'pud' set but not used GCC throws a warning, arch/arm64/mm/mmu.c: In function 'pud_free_pmd_page': arch/arm64/mm/mmu.c:1033:8: warning: variable 'pud' set but not used [-Wunused-but-set-variable] pud_t pud; ^~~ because pud_table() is a macro and compiled away. Fix it by making it a static inline function and for pud_sect() as well. Signed-off-by: Qian Cai Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 3f5461f7b560..5fdcfe237338 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -447,8 +447,8 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, PMD_TYPE_SECT) #if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3 -#define pud_sect(pud) (0) -#define pud_table(pud) (1) +static inline bool pud_sect(pud_t pud) { return false; } +static inline bool pud_table(pud_t pud) { return true; } #else #define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ PUD_TYPE_SECT) From 2bab52af6fe68c43b327a57e5ce5fc10eefdfadf Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Fri, 31 May 2019 05:46:15 -0400 Subject: [PATCH 224/277] drm/msm: add support for per-CRTC max_vblank_count on mdp5 The mdp5 drm/kms driver currently does not work on command-mode DSI panels due to 'vblank wait timed out' errors. This causes a latency of seconds, or tens of seconds in some cases, before content is shown on the panel. This hardware does not have the something that we can use as a frame counter available when running in command mode, so we need to fall back to using timestamps by setting the max_vblank_count to zero. This can be done on a per-CRTC basis, so the convert mdp5 to use drm_crtc_set_max_vblank_count(). This change was tested on a LG Nexus 5 (hammerhead) phone. Suggested-by: Jeffrey Hugo Reviewed-by: Jeffrey Hugo Signed-off-by: Brian Masney Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20190531094619.31704-3-masneyb@onstation.org --- drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c | 16 +++++++++++++++- drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index ff14555372d0..78d5fa230c16 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -439,6 +439,18 @@ static void mdp5_crtc_atomic_disable(struct drm_crtc *crtc, mdp5_crtc->enabled = false; } +static void mdp5_crtc_vblank_on(struct drm_crtc *crtc) +{ + struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state); + struct mdp5_interface *intf = mdp5_cstate->pipeline.intf; + u32 count; + + count = intf->mode == MDP5_INTF_DSI_MODE_COMMAND ? 0 : 0xffffffff; + drm_crtc_set_max_vblank_count(crtc, count); + + drm_crtc_vblank_on(crtc); +} + static void mdp5_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { @@ -475,7 +487,7 @@ static void mdp5_crtc_atomic_enable(struct drm_crtc *crtc, } /* Restore vblank irq handling after power is enabled */ - drm_crtc_vblank_on(crtc); + mdp5_crtc_vblank_on(crtc); mdp5_crtc_mode_set_nofb(crtc); @@ -1028,6 +1040,8 @@ static void mdp5_crtc_reset(struct drm_crtc *crtc) mdp5_crtc_destroy_state(crtc, crtc->state); __drm_atomic_helper_crtc_reset(crtc, &mdp5_cstate->base); + + drm_crtc_vblank_reset(crtc); } static const struct drm_crtc_funcs mdp5_crtc_funcs = { diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 4a60f5fca6b0..fec6ef1ae3b9 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -740,7 +740,7 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) dev->driver->get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos; dev->driver->get_scanout_position = mdp5_get_scanoutpos; dev->driver->get_vblank_counter = mdp5_get_vblank_counter; - dev->max_vblank_count = 0xffffffff; + dev->max_vblank_count = 0; /* max_vblank_count is set on each CRTC */ dev->vblank_disable_immediate = true; return kms; From c14b5dce5ece48035cfd0aa951b39c69ad5056f4 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Thu, 25 Jul 2019 10:53:08 -0600 Subject: [PATCH 225/277] drm/msm: Annotate intentional switch statement fall throughs Explicitly mark intentional fall throughs in switch statements to keep -Wimplicit-fallthrough from complaining. Reviewed-by: Rob Clark Signed-off-by: Jordan Crouse Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/1564073588-27386-1-git-send-email-jcrouse@codeaurora.org --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 2 ++ drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 1 + drivers/gpu/drm/msm/adreno/adreno_gpu.c | 1 + 3 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 1671db47aa57..e9c55d1d6c04 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -59,6 +59,7 @@ static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: if (priv->lastctx == ctx) break; + /* fall-thru */ case MSM_SUBMIT_CMD_BUF: /* copy commands into RB: */ obj = submit->bos[submit->cmd[i].idx].obj; @@ -149,6 +150,7 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: if (priv->lastctx == ctx) break; + /* fall-thru */ case MSM_SUBMIT_CMD_BUF: OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index be39cf01e51e..dc8ec2c94301 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -115,6 +115,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: if (priv->lastctx == ctx) break; + /* fall-thru */ case MSM_SUBMIT_CMD_BUF: OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 9acbbc0f3232..048c8be426f3 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -428,6 +428,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, /* ignore if there has not been a ctx switch: */ if (priv->lastctx == ctx) break; + /* fall-thru */ case MSM_SUBMIT_CMD_BUF: OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2); From 7732d20a160c76006c7fe7bca5178aea6af1d2e8 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 1 Aug 2019 10:47:05 -0400 Subject: [PATCH 226/277] arm64/mm: fix variable 'tag' set but not used When CONFIG_KASAN_SW_TAGS=n, set_tag() is compiled away. GCC throws a warning, mm/kasan/common.c: In function '__kasan_kmalloc': mm/kasan/common.c:464:5: warning: variable 'tag' set but not used [-Wunused-but-set-variable] u8 tag = 0xff; ^~~ Fix it by making __tag_set() a static inline function the same as arch_kasan_set_tag() in mm/kasan/kasan.h for consistency because there is a macro in arch/arm64/include/asm/kasan.h, #define arch_kasan_set_tag(addr, tag) __tag_set(addr, tag) However, when CONFIG_DEBUG_VIRTUAL=n and CONFIG_SPARSEMEM_VMEMMAP=y, page_to_virt() will call __tag_set() with incorrect type of a parameter, so fix that as well. Also, still let page_to_virt() return "void *" instead of "const void *", so will not need to add a similar cast in lowmem_page_address(). Signed-off-by: Qian Cai Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index b7ba75809751..fb04f10a78ab 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -210,7 +210,11 @@ extern u64 vabits_user; #define __tag_reset(addr) untagged_addr(addr) #define __tag_get(addr) (__u8)((u64)(addr) >> 56) #else -#define __tag_set(addr, tag) (addr) +static inline const void *__tag_set(const void *addr, u8 tag) +{ + return addr; +} + #define __tag_reset(addr) (addr) #define __tag_get(addr) 0 #endif @@ -301,8 +305,8 @@ static inline void *phys_to_virt(phys_addr_t x) #define page_to_virt(page) ({ \ unsigned long __addr = \ ((__page_to_voff(page)) | PAGE_OFFSET); \ - unsigned long __addr_tag = \ - __tag_set(__addr, page_kasan_tag(page)); \ + const void *__addr_tag = \ + __tag_set((void *)__addr, page_kasan_tag(page)); \ ((void *)__addr_tag); \ }) From 61f259821dd3306e49b7d42a3f90fb5a4ff3351b Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Tue, 30 Jul 2019 21:39:57 -0700 Subject: [PATCH 227/277] IB/core: Add mitigation for Spectre V1 Some processors may mispredict an array bounds check and speculatively access memory that they should not. With a user supplied array index we like to play things safe by masking the value with the array size before it is used as an index. Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20190731043957.GA1600@agluck-desk2.amr.corp.intel.com Signed-off-by: Doug Ledford --- drivers/infiniband/core/user_mad.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 9f8a48016b41..ffdeaf6e0b68 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -49,6 +49,7 @@ #include #include #include +#include #include @@ -884,11 +885,14 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) if (get_user(id, arg)) return -EFAULT; + if (id >= IB_UMAD_MAX_AGENTS) + return -EINVAL; mutex_lock(&file->port->file_mutex); mutex_lock(&file->mutex); - if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { + id = array_index_nospec(id, IB_UMAD_MAX_AGENTS); + if (!__get_agent(file, id)) { ret = -EINVAL; goto out; } From 621e55ff5b8e0ab5d1063f0eae0ef3960bef8f6e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 31 Jul 2019 11:18:40 +0300 Subject: [PATCH 228/277] RDMA/devices: Do not deadlock during client removal lockdep reports: WARNING: possible circular locking dependency detected modprobe/302 is trying to acquire lock: 0000000007c8919c ((wq_completion)ib_cm){+.+.}, at: flush_workqueue+0xdf/0x990 but task is already holding lock: 000000002d3d2ca9 (&device->client_data_rwsem){++++}, at: remove_client_context+0x79/0xd0 [ib_core] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&device->client_data_rwsem){++++}: down_read+0x3f/0x160 ib_get_net_dev_by_params+0xd5/0x200 [ib_core] cma_ib_req_handler+0x5f6/0x2090 [rdma_cm] cm_process_work+0x29/0x110 [ib_cm] cm_req_handler+0x10f5/0x1c00 [ib_cm] cm_work_handler+0x54c/0x311d [ib_cm] process_one_work+0x4aa/0xa30 worker_thread+0x62/0x5b0 kthread+0x1ca/0x1f0 ret_from_fork+0x24/0x30 -> #1 ((work_completion)(&(&work->work)->work)){+.+.}: process_one_work+0x45f/0xa30 worker_thread+0x62/0x5b0 kthread+0x1ca/0x1f0 ret_from_fork+0x24/0x30 -> #0 ((wq_completion)ib_cm){+.+.}: lock_acquire+0xc8/0x1d0 flush_workqueue+0x102/0x990 cm_remove_one+0x30e/0x3c0 [ib_cm] remove_client_context+0x94/0xd0 [ib_core] disable_device+0x10a/0x1f0 [ib_core] __ib_unregister_device+0x5a/0xe0 [ib_core] ib_unregister_device+0x21/0x30 [ib_core] mlx5_ib_stage_ib_reg_cleanup+0x9/0x10 [mlx5_ib] __mlx5_ib_remove+0x3d/0x70 [mlx5_ib] mlx5_ib_remove+0x12e/0x140 [mlx5_ib] mlx5_remove_device+0x144/0x150 [mlx5_core] mlx5_unregister_interface+0x3f/0xf0 [mlx5_core] mlx5_ib_cleanup+0x10/0x3a [mlx5_ib] __x64_sys_delete_module+0x227/0x350 do_syscall_64+0xc3/0x6a4 entry_SYSCALL_64_after_hwframe+0x49/0xbe Which is due to the read side of the client_data_rwsem being obtained recursively through a work queue flush during cm client removal. The lock is being held across the remove in remove_client_context() so that the function is a fence, once it returns the client is removed. This is required so that the two callers do not proceed with destruction until the client completes removal. Instead of using client_data_rwsem use the existing device unregistration refcount and add a similar client unregistration (client->uses) refcount. This will fence the two unregistration paths without holding any locks. Cc: Fixes: 921eab1143aa ("RDMA/devices: Re-organize device.c locking") Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190731081841.32345-2-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 56 ++++++++++++++++++++++++-------- include/rdma/ib_verbs.h | 3 ++ 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 9773145dee09..d86fbabe48d6 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -99,6 +99,12 @@ static LIST_HEAD(client_list); static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC); static DECLARE_RWSEM(clients_rwsem); +static void ib_client_put(struct ib_client *client) +{ + if (refcount_dec_and_test(&client->uses)) + complete(&client->uses_zero); +} + /* * If client_data is registered then the corresponding client must also still * be registered. @@ -660,6 +666,14 @@ static int add_client_context(struct ib_device *device, return 0; down_write(&device->client_data_rwsem); + /* + * So long as the client is registered hold both the client and device + * unregistration locks. + */ + if (!refcount_inc_not_zero(&client->uses)) + goto out_unlock; + refcount_inc(&device->refcount); + /* * Another caller to add_client_context got here first and has already * completely initialized context. @@ -683,6 +697,9 @@ static int add_client_context(struct ib_device *device, return 0; out: + ib_device_put(device); + ib_client_put(client); +out_unlock: up_write(&device->client_data_rwsem); return ret; } @@ -702,7 +719,7 @@ static void remove_client_context(struct ib_device *device, client_data = xa_load(&device->client_data, client_id); xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED); client = xa_load(&clients, client_id); - downgrade_write(&device->client_data_rwsem); + up_write(&device->client_data_rwsem); /* * Notice we cannot be holding any exclusive locks when calling the @@ -712,17 +729,13 @@ static void remove_client_context(struct ib_device *device, * * For this reason clients and drivers should not call the * unregistration functions will holdling any locks. - * - * It tempting to drop the client_data_rwsem too, but this is required - * to ensure that unregister_client does not return until all clients - * are completely unregistered, which is required to avoid module - * unloading races. */ if (client->remove) client->remove(device, client_data); xa_erase(&device->client_data, client_id); - up_read(&device->client_data_rwsem); + ib_device_put(device); + ib_client_put(client); } static int alloc_port_data(struct ib_device *device) @@ -1705,6 +1718,8 @@ int ib_register_client(struct ib_client *client) unsigned long index; int ret; + refcount_set(&client->uses, 1); + init_completion(&client->uses_zero); ret = assign_client_id(client); if (ret) return ret; @@ -1740,16 +1755,29 @@ void ib_unregister_client(struct ib_client *client) unsigned long index; down_write(&clients_rwsem); + ib_client_put(client); xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED); up_write(&clients_rwsem); - /* - * Every device still known must be serialized to make sure we are - * done with the client callbacks before we return. - */ - down_read(&devices_rwsem); - xa_for_each (&devices, index, device) + + /* We do not want to have locks while calling client->remove() */ + rcu_read_lock(); + xa_for_each (&devices, index, device) { + if (!ib_device_try_get(device)) + continue; + rcu_read_unlock(); + remove_client_context(device, client->client_id); - up_read(&devices_rwsem); + + ib_device_put(device); + rcu_read_lock(); + } + rcu_read_unlock(); + + /* + * remove_client_context() is not a fence, it can return even though a + * removal is ongoing. Wait until all removals are completed. + */ + wait_for_completion(&client->uses_zero); down_write(&clients_rwsem); list_del(&client->list); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c5f8a9f17063..7b80ec822043 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2647,6 +2647,9 @@ struct ib_client { const union ib_gid *gid, const struct sockaddr *addr, void *client_data); + + refcount_t uses; + struct completion uses_zero; struct list_head list; u32 client_id; From 9cd5881719e9555cae300ec8b389eda3c8101339 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 31 Jul 2019 11:18:41 +0300 Subject: [PATCH 229/277] RDMA/devices: Remove the lock around remove_client_context Due to the complexity of client->remove() callbacks it is desirable to not hold any locks while calling them. Remove the last one by tracking only the highest client ID and running backwards from there over the xarray. Since the only purpose of that lock was to protect the linked list, we can drop the lock. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190731081841.32345-3-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 48 ++++++++++++++++++-------------- include/rdma/ib_verbs.h | 1 - 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index d86fbabe48d6..ea8661a00651 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -94,7 +94,7 @@ static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC); static DECLARE_RWSEM(devices_rwsem); #define DEVICE_REGISTERED XA_MARK_1 -static LIST_HEAD(client_list); +static u32 highest_client_id; #define CLIENT_REGISTERED XA_MARK_1 static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC); static DECLARE_RWSEM(clients_rwsem); @@ -1237,7 +1237,7 @@ static int setup_device(struct ib_device *device) static void disable_device(struct ib_device *device) { - struct ib_client *client; + u32 cid; WARN_ON(!refcount_read(&device->refcount)); @@ -1245,10 +1245,19 @@ static void disable_device(struct ib_device *device) xa_clear_mark(&devices, device->index, DEVICE_REGISTERED); up_write(&devices_rwsem); + /* + * Remove clients in LIFO order, see assign_client_id. This could be + * more efficient if xarray learns to reverse iterate. Since no new + * clients can be added to this ib_device past this point we only need + * the maximum possible client_id value here. + */ down_read(&clients_rwsem); - list_for_each_entry_reverse(client, &client_list, list) - remove_client_context(device, client->client_id); + cid = highest_client_id; up_read(&clients_rwsem); + while (cid) { + cid--; + remove_client_context(device, cid); + } /* Pairs with refcount_set in enable_device */ ib_device_put(device); @@ -1675,30 +1684,31 @@ static int assign_client_id(struct ib_client *client) /* * The add/remove callbacks must be called in FIFO/LIFO order. To * achieve this we assign client_ids so they are sorted in - * registration order, and retain a linked list we can reverse iterate - * to get the LIFO order. The extra linked list can go away if xarray - * learns to reverse iterate. + * registration order. */ - if (list_empty(&client_list)) { - client->client_id = 0; - } else { - struct ib_client *last; - - last = list_last_entry(&client_list, struct ib_client, list); - client->client_id = last->client_id + 1; - } + client->client_id = highest_client_id; ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL); if (ret) goto out; + highest_client_id++; xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); - list_add_tail(&client->list, &client_list); out: up_write(&clients_rwsem); return ret; } +static void remove_client_id(struct ib_client *client) +{ + down_write(&clients_rwsem); + xa_erase(&clients, client->client_id); + for (; highest_client_id; highest_client_id--) + if (xa_load(&clients, highest_client_id - 1)) + break; + up_write(&clients_rwsem); +} + /** * ib_register_client - Register an IB client * @client:Client to register @@ -1778,11 +1788,7 @@ void ib_unregister_client(struct ib_client *client) * removal is ongoing. Wait until all removals are completed. */ wait_for_completion(&client->uses_zero); - - down_write(&clients_rwsem); - list_del(&client->list); - xa_erase(&clients, client->client_id); - up_write(&clients_rwsem); + remove_client_id(client); } EXPORT_SYMBOL(ib_unregister_client); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7b80ec822043..4f225175cb91 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2650,7 +2650,6 @@ struct ib_client { refcount_t uses; struct completion uses_zero; - struct list_head list; u32 client_id; /* kverbs are not required by the client */ From e5366d309a772fef264ec85e858f9ea46f939848 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Wed, 31 Jul 2019 11:19:29 +0300 Subject: [PATCH 230/277] IB/mlx5: Fix MR registration flow to use UMR properly Driver shouldn't allow to use UMR to register a MR when umr_modify_atomic_disabled is set. Otherwise it will always end up with a failure in the post send flow which sets the UMR WQE to modify atomic access right. Fixes: c8d75a980fab ("IB/mlx5: Respect new UMR capabilities") Signed-off-by: Guy Levi Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190731081929.32559-1-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 2c77456f359f..b74fad08412f 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -51,22 +51,12 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int mr_cache_max_order(struct mlx5_ib_dev *dev); static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); -static bool umr_can_modify_entity_size(struct mlx5_ib_dev *dev) -{ - return !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled); -} static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) { return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); } -static bool use_umr(struct mlx5_ib_dev *dev, int order) -{ - return order <= mr_cache_max_order(dev) && - umr_can_modify_entity_size(dev); -} - static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); @@ -1271,7 +1261,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr = NULL; - bool populate_mtts = false; + bool use_umr; struct ib_umem *umem; int page_shift; int npages; @@ -1303,29 +1293,30 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (err < 0) return ERR_PTR(err); - if (use_umr(dev, order)) { + use_umr = !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled) && + (!MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled) || + !MLX5_CAP_GEN(dev->mdev, atomic)); + + if (order <= mr_cache_max_order(dev) && use_umr) { mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, page_shift, order, access_flags); if (PTR_ERR(mr) == -EAGAIN) { mlx5_ib_dbg(dev, "cache empty for order %d\n", order); mr = NULL; } - populate_mtts = false; } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { if (access_flags & IB_ACCESS_ON_DEMAND) { err = -EINVAL; pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n"); goto error; } - populate_mtts = true; + use_umr = false; } if (!mr) { - if (!umr_can_modify_entity_size(dev)) - populate_mtts = true; mutex_lock(&dev->slow_path_mutex); mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, - page_shift, access_flags, populate_mtts); + page_shift, access_flags, !use_umr); mutex_unlock(&dev->slow_path_mutex); } @@ -1341,7 +1332,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, update_odp_mr(mr); - if (!populate_mtts) { + if (use_umr) { int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; if (access_flags & IB_ACCESS_ON_DEMAND) From 52e0a118a20308dd6aa531e20a5ab5907d2264c8 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 1 Aug 2019 13:43:54 +0300 Subject: [PATCH 231/277] RDMA/restrack: Track driver QP types in resource tracker The check for QP type different than XRC has excluded driver QP types from the resource tracker. As a result, "rdma resource show" user command would not show opened driver QPs which does not reflect the real state of the system. Check QP type explicitly instead of assuming enum values/ordering. Fixes: 40909f664d27 ("RDMA/efa: Add EFA verbs implementation") Signed-off-by: Gal Pressman Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190801104354.11417-1-galpress@amazon.com Signed-off-by: Doug Ledford --- drivers/infiniband/core/core_priv.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 888d89ce81df..beee7b7e0d9a 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -302,7 +302,9 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, struct ib_udata *udata, struct ib_uobject *uobj) { + enum ib_qp_type qp_type = attr->qp_type; struct ib_qp *qp; + bool is_xrc; if (!dev->ops.create_qp) return ERR_PTR(-EOPNOTSUPP); @@ -320,7 +322,8 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, * and more importantly they are created internaly by driver, * see mlx5 create_dev_resources() as an example. */ - if (attr->qp_type < IB_QPT_XRC_INI) { + is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT; + if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) { qp->res.type = RDMA_RESTRACK_QP; if (uobj) rdma_restrack_uadd(&qp->res); From 770b7d96cfff6a8bf6c9f261ba6f135dc9edf484 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 1 Aug 2019 15:14:49 +0300 Subject: [PATCH 232/277] IB/mad: Fix use-after-free in ib mad completion handling We encountered a use-after-free bug when unloading the driver: [ 3562.116059] BUG: KASAN: use-after-free in ib_mad_post_receive_mads+0xddc/0xed0 [ib_core] [ 3562.117233] Read of size 4 at addr ffff8882ca5aa868 by task kworker/u13:2/23862 [ 3562.118385] [ 3562.119519] CPU: 2 PID: 23862 Comm: kworker/u13:2 Tainted: G OE 5.1.0-for-upstream-dbg-2019-05-19_16-44-30-13 #1 [ 3562.121806] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu2 04/01/2014 [ 3562.123075] Workqueue: ib-comp-unb-wq ib_cq_poll_work [ib_core] [ 3562.124383] Call Trace: [ 3562.125640] dump_stack+0x9a/0xeb [ 3562.126911] print_address_description+0xe3/0x2e0 [ 3562.128223] ? ib_mad_post_receive_mads+0xddc/0xed0 [ib_core] [ 3562.129545] __kasan_report+0x15c/0x1df [ 3562.130866] ? ib_mad_post_receive_mads+0xddc/0xed0 [ib_core] [ 3562.132174] kasan_report+0xe/0x20 [ 3562.133514] ib_mad_post_receive_mads+0xddc/0xed0 [ib_core] [ 3562.134835] ? find_mad_agent+0xa00/0xa00 [ib_core] [ 3562.136158] ? qlist_free_all+0x51/0xb0 [ 3562.137498] ? mlx4_ib_sqp_comp_worker+0x1970/0x1970 [mlx4_ib] [ 3562.138833] ? quarantine_reduce+0x1fa/0x270 [ 3562.140171] ? kasan_unpoison_shadow+0x30/0x40 [ 3562.141522] ib_mad_recv_done+0xdf6/0x3000 [ib_core] [ 3562.142880] ? _raw_spin_unlock_irqrestore+0x46/0x70 [ 3562.144277] ? ib_mad_send_done+0x1810/0x1810 [ib_core] [ 3562.145649] ? mlx4_ib_destroy_cq+0x2a0/0x2a0 [mlx4_ib] [ 3562.147008] ? _raw_spin_unlock_irqrestore+0x46/0x70 [ 3562.148380] ? debug_object_deactivate+0x2b9/0x4a0 [ 3562.149814] __ib_process_cq+0xe2/0x1d0 [ib_core] [ 3562.151195] ib_cq_poll_work+0x45/0xf0 [ib_core] [ 3562.152577] process_one_work+0x90c/0x1860 [ 3562.153959] ? pwq_dec_nr_in_flight+0x320/0x320 [ 3562.155320] worker_thread+0x87/0xbb0 [ 3562.156687] ? __kthread_parkme+0xb6/0x180 [ 3562.158058] ? process_one_work+0x1860/0x1860 [ 3562.159429] kthread+0x320/0x3e0 [ 3562.161391] ? kthread_park+0x120/0x120 [ 3562.162744] ret_from_fork+0x24/0x30 ... [ 3562.187615] Freed by task 31682: [ 3562.188602] save_stack+0x19/0x80 [ 3562.189586] __kasan_slab_free+0x11d/0x160 [ 3562.190571] kfree+0xf5/0x2f0 [ 3562.191552] ib_mad_port_close+0x200/0x380 [ib_core] [ 3562.192538] ib_mad_remove_device+0xf0/0x230 [ib_core] [ 3562.193538] remove_client_context+0xa6/0xe0 [ib_core] [ 3562.194514] disable_device+0x14e/0x260 [ib_core] [ 3562.195488] __ib_unregister_device+0x79/0x150 [ib_core] [ 3562.196462] ib_unregister_device+0x21/0x30 [ib_core] [ 3562.197439] mlx4_ib_remove+0x162/0x690 [mlx4_ib] [ 3562.198408] mlx4_remove_device+0x204/0x2c0 [mlx4_core] [ 3562.199381] mlx4_unregister_interface+0x49/0x1d0 [mlx4_core] [ 3562.200356] mlx4_ib_cleanup+0xc/0x1d [mlx4_ib] [ 3562.201329] __x64_sys_delete_module+0x2d2/0x400 [ 3562.202288] do_syscall_64+0x95/0x470 [ 3562.203277] entry_SYSCALL_64_after_hwframe+0x49/0xbe The problem was that the MAD PD was deallocated before the MAD CQ. There was completion work pending for the CQ when the PD got deallocated. When the mad completion handling reached procedure ib_mad_post_receive_mads(), we got a use-after-free bug in the following line of code in that procedure: sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey; (the pd pointer in the above line is no longer valid, because the pd has been deallocated). We fix this by allocating the PD before the CQ in procedure ib_mad_port_open(), and deallocating the PD after freeing the CQ in procedure ib_mad_port_close(). Since the CQ completion work queue is flushed during ib_free_cq(), no completions will be pending for that CQ when the PD is later deallocated. Note that freeing the CQ before deallocating the PD is the practice in the ULPs. Fixes: 4be90bc60df4 ("IB/mad: Remove ib_get_dma_mr calls") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190801121449.24973-1-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/core/mad.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index cc99479b2c09..9947d16edef2 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -3224,18 +3224,18 @@ static int ib_mad_port_open(struct ib_device *device, if (has_smi) cq_size *= 2; + port_priv->pd = ib_alloc_pd(device, 0); + if (IS_ERR(port_priv->pd)) { + dev_err(&device->dev, "Couldn't create ib_mad PD\n"); + ret = PTR_ERR(port_priv->pd); + goto error3; + } + port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0, IB_POLL_UNBOUND_WORKQUEUE); if (IS_ERR(port_priv->cq)) { dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); ret = PTR_ERR(port_priv->cq); - goto error3; - } - - port_priv->pd = ib_alloc_pd(device, 0); - if (IS_ERR(port_priv->pd)) { - dev_err(&device->dev, "Couldn't create ib_mad PD\n"); - ret = PTR_ERR(port_priv->pd); goto error4; } @@ -3278,11 +3278,11 @@ error8: error7: destroy_mad_qp(&port_priv->qp_info[0]); error6: - ib_dealloc_pd(port_priv->pd); -error4: ib_free_cq(port_priv->cq); cleanup_recv_queue(&port_priv->qp_info[1]); cleanup_recv_queue(&port_priv->qp_info[0]); +error4: + ib_dealloc_pd(port_priv->pd); error3: kfree(port_priv); @@ -3312,8 +3312,8 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) destroy_workqueue(port_priv->wq); destroy_mad_qp(&port_priv->qp_info[1]); destroy_mad_qp(&port_priv->qp_info[0]); - ib_dealloc_pd(port_priv->pd); ib_free_cq(port_priv->cq); + ib_dealloc_pd(port_priv->pd); cleanup_recv_queue(&port_priv->qp_info[1]); cleanup_recv_queue(&port_priv->qp_info[0]); /* XXX: Handle deallocation of MAD registration tables */ From 6497d0a9c53df6e98b25e2b79f2295d7caa47b6e Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 31 Jul 2019 12:54:28 -0500 Subject: [PATCH 233/277] IB/hfi1: Fix Spectre v1 vulnerability sl is controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. Fix this by sanitizing sl before using it to index ibp->sl_to_sc. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://lore.kernel.org/lkml/20180423164740.GY17484@dhcp22.suse.cz/ Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20190731175428.GA16736@embeddedor Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index c4b243f50c76..646f61545ed6 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -54,6 +54,7 @@ #include #include #include +#include #include "hfi.h" #include "common.h" @@ -1536,6 +1537,7 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr) sl = rdma_ah_get_sl(ah_attr); if (sl >= ARRAY_SIZE(ibp->sl_to_sc)) return -EINVAL; + sl = array_index_nospec(sl, ARRAY_SIZE(ibp->sl_to_sc)); sc5 = ibp->sl_to_sc[sl]; if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf) From 23eaf3b5c1a755e3193480c76fb29414be648688 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 31 Jul 2019 11:38:52 +0300 Subject: [PATCH 234/277] RDMA/mlx5: Release locks during notifier unregister The below kernel panic was observed when created bond mode LACP with GRE tunnel on top. The reason to it was not released spinlock during mlx5 notify unregsiter sequence. [ 234.562007] BUG: scheduling while atomic: sh/10900/0x00000002 [ 234.563005] Preemption disabled at: [ 234.566864] ------------[ cut here ]------------ [ 234.567120] DEBUG_LOCKS_WARN_ON(val > preempt_count()) [ 234.567139] WARNING: CPU: 16 PID: 10900 at kernel/sched/core.c:3203 preempt_count_sub+0xca/0x170 [ 234.569550] CPU: 16 PID: 10900 Comm: sh Tainted: G W 5.2.0-rc1-for-linust-dbg-2019-05-25_04-57-33-60 #1 [ 234.569886] Hardware name: Dell Inc. PowerEdge R720/0X3D66, BIOS 2.6.1 02/12/2018 [ 234.570183] RIP: 0010:preempt_count_sub+0xca/0x170 [ 234.570404] Code: 03 38 d0 7c 08 84 d2 0f 85 b0 00 00 00 8b 15 dd 02 03 04 85 d2 75 ba 48 c7 c6 00 e1 88 83 48 c7 c7 40 e1 88 83 e8 76 11 f7 ff <0f> 0b 5b c3 65 8b 05 d3 1f d8 7e 84 c0 75 82 e8 62 c3 c3 00 85 c0 [ 234.570911] RSP: 0018:ffff888b94477b08 EFLAGS: 00010286 [ 234.571133] RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 [ 234.571391] RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000246 [ 234.571648] RBP: ffff888ba5560000 R08: fffffbfff08962d5 R09: fffffbfff08962d5 [ 234.571902] R10: 0000000000000001 R11: fffffbfff08962d4 R12: ffff888bac6e9548 [ 234.572157] R13: ffff888babfaf728 R14: ffff888bac6e9568 R15: ffff888babfaf750 [ 234.572412] FS: 00007fcafa59b740(0000) GS:ffff888bed200000(0000) knlGS:0000000000000000 [ 234.572686] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 234.572914] CR2: 00007f984f16b140 CR3: 0000000b2bf0a001 CR4: 00000000001606e0 [ 234.573172] Call Trace: [ 234.573336] _raw_spin_unlock+0x2e/0x50 [ 234.573542] mlx5_ib_unbind_slave_port+0x1bc/0x690 [mlx5_ib] [ 234.573793] mlx5_ib_cleanup_multiport_master+0x1d3/0x660 [mlx5_ib] [ 234.574039] mlx5_ib_stage_init_cleanup+0x4c/0x360 [mlx5_ib] [ 234.574271] ? kfree+0xf5/0x2f0 [ 234.574465] __mlx5_ib_remove+0x61/0xd0 [mlx5_ib] [ 234.574688] ? __mlx5_ib_remove+0xd0/0xd0 [mlx5_ib] [ 234.574951] mlx5_remove_device+0x234/0x300 [mlx5_core] [ 234.575224] mlx5_unregister_device+0x4d/0x1e0 [mlx5_core] [ 234.575493] remove_one+0x4f/0x160 [mlx5_core] [ 234.575704] pci_device_remove+0xef/0x2a0 [ 234.581407] ? pcibios_free_irq+0x10/0x10 [ 234.587143] ? up_read+0xc1/0x260 [ 234.592785] device_release_driver_internal+0x1ab/0x430 [ 234.598442] unbind_store+0x152/0x200 [ 234.604064] ? sysfs_kf_write+0x3b/0x180 [ 234.609441] ? sysfs_file_ops+0x160/0x160 [ 234.615021] kernfs_fop_write+0x277/0x440 [ 234.620288] ? __sb_start_write+0x1ef/0x2c0 [ 234.625512] vfs_write+0x15e/0x460 [ 234.630786] ksys_write+0x156/0x1e0 [ 234.635988] ? __ia32_sys_read+0xb0/0xb0 [ 234.641120] ? trace_hardirqs_off_thunk+0x1a/0x1c [ 234.646163] do_syscall_64+0x95/0x470 [ 234.651106] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 234.656004] RIP: 0033:0x7fcaf9c9cfd0 [ 234.660686] Code: 73 01 c3 48 8b 0d c0 6e 2d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d cd cf 2d 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 ee cb 01 00 48 89 04 24 [ 234.670128] RSP: 002b:00007ffd3b01ddd8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 234.674811] RAX: ffffffffffffffda RBX: 000000000000000d RCX: 00007fcaf9c9cfd0 [ 234.679387] RDX: 000000000000000d RSI: 00007fcafa5c1000 RDI: 0000000000000001 [ 234.683848] RBP: 00007fcafa5c1000 R08: 000000000000000a R09: 00007fcafa59b740 [ 234.688167] R10: 00007ffd3b01d8e0 R11: 0000000000000246 R12: 00007fcaf9f75400 [ 234.692386] R13: 000000000000000d R14: 0000000000000001 R15: 0000000000000000 [ 234.696495] irq event stamp: 153067 [ 234.700525] hardirqs last enabled at (153067): [] _raw_spin_unlock_irqrestore+0x59/0x70 [ 234.704665] hardirqs last disabled at (153066): [] _raw_spin_lock_irqsave+0x22/0x90 [ 234.708722] softirqs last enabled at (153058): [] __do_softirq+0x6c5/0xb4e [ 234.712673] softirqs last disabled at (153051): [] irq_exit+0x17d/0x1d0 [ 234.716601] ---[ end trace 5dbf096843ee9ce6 ]--- Fixes: df097a278c75 ("IB/mlx5: Use the new mlx5 core notifier API") Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190731083852.584-1-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c2a5780cb394..e12a4404096b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5802,13 +5802,12 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, return; } - if (mpi->mdev_events.notifier_call) - mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events); - mpi->mdev_events.notifier_call = NULL; - mpi->ibdev = NULL; spin_unlock(&port->mp.mpi_lock); + if (mpi->mdev_events.notifier_call) + mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events); + mpi->mdev_events.notifier_call = NULL; mlx5_remove_netdev_notifier(ibdev, port_num); spin_lock(&port->mp.mpi_lock); From 9ca7ad6c7706edeae331c1632d0c63897418ebad Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Wed, 26 Jun 2019 11:00:15 -0700 Subject: [PATCH 235/277] drm: msm: Fix add_gpu_components add_gpu_components() adds found GPU nodes from the DT to the match list, regardless of the status of the nodes. This is a problem, because if the nodes are disabled, they should not be on the match list because they will not be matched. This prevents display from initing if a GPU node is defined, but it's status is disabled. Fix this by checking the node's status before adding it to the match list. Fixes: dc3ea265b856 (drm/msm: Drop the gpu binding) Reviewed-by: Rob Clark Signed-off-by: Jeffrey Hugo Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20190626180015.45242-1-jeffrey.l.hugo@gmail.com --- drivers/gpu/drm/msm/msm_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index c226156f2dea..c356f5ccf253 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -1279,7 +1279,8 @@ static int add_gpu_components(struct device *dev, if (!np) return 0; - drm_of_component_match_add(dev, matchptr, compare_of, np); + if (of_device_is_available(np)) + drm_of_component_match_add(dev, matchptr, compare_of, np); of_node_put(np); From 020fb3bebc224dfe9353a56ecbe2d5fac499dffc Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 1 Aug 2019 01:27:25 +0000 Subject: [PATCH 236/277] RDMA/hns: Fix error return code in hns_roce_v1_rsv_lp_qp() Fix to return error code -ENOMEM from the rdma_zalloc_drv_obj() error handling case instead of 0, as done elsewhere in this function. Fixes: e8ac9389f0d7 ("RDMA: Fix allocation failure on pointer pd") Fixes: 21a428a019c9 ("RDMA: Handle PD allocations by IB/core") Signed-off-by: Wei Yongjun Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190801012725.150493-1-weiyongjun1@huawei.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 81e6dedb1e02..c07e387a07a3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -750,8 +750,10 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0); pd = rdma_zalloc_drv_obj(ibdev, ib_pd); - if (!pd) + if (!pd) { + ret = -ENOMEM; goto alloc_mem_failed; + } pd->device = ibdev; ret = hns_roce_alloc_pd(pd, NULL); From fc5b34a35458314df1dd00281f6e41f419581aa9 Mon Sep 17 00:00:00 2001 From: Micah Morton Date: Thu, 1 Aug 2019 10:28:27 -0700 Subject: [PATCH 237/277] Add entry in MAINTAINERS file for SafeSetID LSM This LSM was added in v5.1 and needs an entry in the MAINTAINERS file. Signed-off-by: Micah Morton Acked-by: James Morris --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 6426db5198f0..30b8a83c3afa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14016,6 +14016,12 @@ F: drivers/media/common/saa7146/ F: drivers/media/pci/saa7146/ F: include/media/drv-intf/saa7146* +SAFESETID SECURITY MODULE +M: Micah Morton +S: Supported +F: security/safesetid/ +F: Documentation/admin-guide/LSM/SafeSetID.rst + SAMSUNG AUDIO (ASoC) DRIVERS M: Krzysztof Kozlowski M: Sangbeom Kim From 0eb6ddfb865c7d7ec05cb330050671363515e67f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 1 Aug 2019 19:21:51 +0900 Subject: [PATCH 238/277] block: Fix __blkdev_direct_IO() for bio fragments The recent fix to properly handle IOCB_NOWAIT for async O_DIRECT IO (patch 6a43074e2f46) introduced two problems with BIO fragment handling for direct IOs: 1) The dio size processed is calculated by incrementing the ret variable by the size of the bio fragment issued for the dio. However, this size is obtained directly from bio->bi_iter.bi_size AFTER the bio submission which may result in referencing the bi_size value after the bio completed, resulting in an incorrect value use. 2) The ret variable is not incremented by the size of the last bio fragment issued for the bio, leading to an invalid IO size being returned to the user. Fix both problem by using dio->size (which is incremented before the bio submission) to update the value of ret after bio submissions, including for the last bio fragment issued. Fixes: 6a43074e2f46 ("block: properly handle IOCB_NOWAIT for async O_DIRECT IO") Reported-by: Masato Suzuki Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- fs/block_dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 22591bad9353..a6f7c892cb4a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -439,6 +439,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) ret = -EAGAIN; goto error; } + ret = dio->size; if (polled) WRITE_ONCE(iocb->ki_cookie, qc); @@ -465,7 +466,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) ret = -EAGAIN; goto error; } - ret += bio->bi_iter.bi_size; + ret = dio->size; bio = bio_alloc(gfp, nr_pages); if (!bio) { From d12e3aae160fb26b534c4496b211d6e60a5179ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Mon, 22 Jul 2019 20:55:27 +0200 Subject: [PATCH 239/277] i2c: at91: disable TXRDY interrupt after sending data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Driver was not disabling TXRDY interrupt after last TX byte. This caused interrupt storm until transfer timeouts for slow or broken device on the bus. The patch fixes the interrupt storm on my SAMA5D2-based board. Cc: stable@vger.kernel.org # 5.2.x [v5.2 introduced file split; the patch should apply to i2c-at91.c before the split] Fixes: fac368a04048 ("i2c: at91: add new driver") Signed-off-by: Michał Mirosław Acked-by: Ludovic Desroches Tested-by: Raag Jadav Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-at91-master.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-at91-master.c b/drivers/i2c/busses/i2c-at91-master.c index e87232f2e708..a3fcc35ffd3b 100644 --- a/drivers/i2c/busses/i2c-at91-master.c +++ b/drivers/i2c/busses/i2c-at91-master.c @@ -122,9 +122,11 @@ static void at91_twi_write_next_byte(struct at91_twi_dev *dev) writeb_relaxed(*dev->buf, dev->base + AT91_TWI_THR); /* send stop when last byte has been written */ - if (--dev->buf_len == 0) + if (--dev->buf_len == 0) { if (!dev->use_alt_cmd) at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP); + at91_twi_write(dev, AT91_TWI_IDR, AT91_TWI_TXRDY); + } dev_dbg(dev->dev, "wrote 0x%x, to go %zu\n", *dev->buf, dev->buf_len); @@ -542,9 +544,8 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev) } else { at91_twi_write_next_byte(dev); at91_twi_write(dev, AT91_TWI_IER, - AT91_TWI_TXCOMP | - AT91_TWI_NACK | - AT91_TWI_TXRDY); + AT91_TWI_TXCOMP | AT91_TWI_NACK | + (dev->buf_len ? AT91_TWI_TXRDY : 0)); } } From b1ac6704493fa14b5dc19eb6b69a73932361a131 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Mon, 22 Jul 2019 21:05:56 +0200 Subject: [PATCH 240/277] i2c: at91: fix clk_offset for sama5d2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In SAMA5D2 datasheet, TWIHS_CWGR register rescription mentions clock offset of 3 cycles (compared to 4 in eg. SAMA5D3). Cc: stable@vger.kernel.org # 5.2.x [needs applying to i2c-at91.c instead for earlier kernels] Fixes: 0ef6f3213dac ("i2c: at91: add support for new alternative command mode") Signed-off-by: Michał Mirosław Acked-by: Ludovic Desroches Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-at91-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-at91-core.c b/drivers/i2c/busses/i2c-at91-core.c index 8d55cdd69ff4..435c7d7377a3 100644 --- a/drivers/i2c/busses/i2c-at91-core.c +++ b/drivers/i2c/busses/i2c-at91-core.c @@ -142,7 +142,7 @@ static struct at91_twi_pdata sama5d4_config = { static struct at91_twi_pdata sama5d2_config = { .clk_max_div = 7, - .clk_offset = 4, + .clk_offset = 3, .has_unre_flag = true, .has_alt_cmd = true, .has_hold_field = true, From 8eb9a2dff019055e4ff307bb7f8c64a7a20e79c8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sun, 28 Jul 2019 18:51:38 -0500 Subject: [PATCH 241/277] i2c: s3c2410: Mark expected switch fall-through Mark switch cases where we are expecting to fall through. This patch fixes the following warning: drivers/i2c/busses/i2c-s3c2410.c: In function 'i2c_s3c_irq_nextbyte': drivers/i2c/busses/i2c-s3c2410.c:431:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (i2c->state == STATE_READ) ^ drivers/i2c/busses/i2c-s3c2410.c:439:2: note: here case STATE_WRITE: ^~~~ Notice that, in this particular case, the code comment is modified in accordance with what GCC is expecting to find. Reported-by: Stephen Rothwell Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-s3c2410.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c index d97fb857b0ea..c98ef4c4a0c9 100644 --- a/drivers/i2c/busses/i2c-s3c2410.c +++ b/drivers/i2c/busses/i2c-s3c2410.c @@ -435,6 +435,7 @@ static int i2c_s3c_irq_nextbyte(struct s3c24xx_i2c *i2c, unsigned long iicstat) * fall through to the write state, as we will need to * send a byte as well */ + /* Fall through */ case STATE_WRITE: /* From 42787ed79638dc7f0f8d5c164caba1e87bfab50f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 1 Aug 2019 01:31:08 +0200 Subject: [PATCH 242/277] ACPI: PM: Fix regression in acpi_device_set_power() Commit f850a48a0799 ("ACPI: PM: Allow transitions to D0 to occur in special cases") overlooked the fact that acpi_power_transition() may change the power.state value for the target device and if that happens, it may confuse acpi_device_set_power() and cause it to omit the _PS0 evaluation which on some systems is necessary to change power states of devices from low-power to D0. Fix that by saving the current value of power.state for the target device before passing it to acpi_power_transition() and using the saved value in a subsequent check. Fixes: f850a48a0799 ("ACPI: PM: Allow transitions to D0 to occur in special cases") Reported-by: Kai-Heng Feng Reported-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki Tested-by: Kai-Heng Feng Tested-by: Mario Limonciello --- drivers/acpi/device_pm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 28cffaaf9d82..f616b16c1f0b 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -232,13 +232,15 @@ int acpi_device_set_power(struct acpi_device *device, int state) if (device->power.flags.power_resources) result = acpi_power_transition(device, target_state); } else { + int cur_state = device->power.state; + if (device->power.flags.power_resources) { result = acpi_power_transition(device, ACPI_STATE_D0); if (result) goto end; } - if (device->power.state == ACPI_STATE_D0) { + if (cur_state == ACPI_STATE_D0) { int psc; /* Nothing to do here if _PSC is not present. */ From 412e85b605315fd129a849599cf4a5a7959573a8 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 1 Aug 2019 18:02:15 -0400 Subject: [PATCH 243/277] drm/nouveau: Only release VCPI slots on mode changes Looks like a regression got introduced into nv50_mstc_atomic_check() that somehow didn't get found until now. If userspace changes crtc_state->active to false but leaves the CRTC enabled, we end up calling drm_dp_atomic_find_vcpi_slots() using the PBN calculated in asyh->dp.pbn. However, if the display is inactive we end up calculating a PBN of 0, which inadvertently causes us to have an allocation of 0. >From there, if userspace then disables the CRTC afterwards we end up accidentally attempting to free the VCPI twice: WARNING: CPU: 0 PID: 1484 at drivers/gpu/drm/drm_dp_mst_topology.c:3336 drm_dp_atomic_release_vcpi_slots+0x87/0xb0 [drm_kms_helper] RIP: 0010:drm_dp_atomic_release_vcpi_slots+0x87/0xb0 [drm_kms_helper] Call Trace: drm_atomic_helper_check_modeset+0x3f3/0xa60 [drm_kms_helper] ? drm_atomic_check_only+0x43/0x780 [drm] drm_atomic_helper_check+0x15/0x90 [drm_kms_helper] nv50_disp_atomic_check+0x83/0x1d0 [nouveau] drm_atomic_check_only+0x54d/0x780 [drm] ? drm_atomic_set_crtc_for_connector+0xec/0x100 [drm] drm_atomic_commit+0x13/0x50 [drm] drm_atomic_helper_set_config+0x81/0x90 [drm_kms_helper] drm_mode_setcrtc+0x194/0x6a0 [drm] ? vprintk_emit+0x16a/0x230 ? drm_ioctl+0x163/0x390 [drm] ? drm_mode_getcrtc+0x180/0x180 [drm] drm_ioctl_kernel+0xaa/0xf0 [drm] drm_ioctl+0x208/0x390 [drm] ? drm_mode_getcrtc+0x180/0x180 [drm] nouveau_drm_ioctl+0x63/0xb0 [nouveau] do_vfs_ioctl+0x405/0x660 ? recalc_sigpending+0x17/0x50 ? _copy_from_user+0x37/0x60 ksys_ioctl+0x5e/0x90 ? exit_to_usermode_loop+0x92/0xe0 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x59/0x190 entry_SYSCALL_64_after_hwframe+0x44/0xa9 WARNING: CPU: 0 PID: 1484 at drivers/gpu/drm/drm_dp_mst_topology.c:3336 drm_dp_atomic_release_vcpi_slots+0x87/0xb0 [drm_kms_helper] ---[ end trace 4c395c0c51b1f88d ]--- [drm:drm_dp_atomic_release_vcpi_slots [drm_kms_helper]] *ERROR* no VCPI for [MST PORT:00000000e288eb7d] found in mst state 000000008e642070 So, fix this by doing what we probably should have done from the start: only call drm_dp_atomic_find_vcpi_slots() when crtc_state->mode_changed is set, so that VCPI allocations remain for as long as the CRTC is enabled. Signed-off-by: Lyude Paul Fixes: 232c9eec417a ("drm/nouveau: Use atomic VCPI helpers for MST") Cc: Lyude Paul Cc: Ben Skeggs Cc: Daniel Vetter Cc: David Airlie Cc: Jerry Zuo Cc: Harry Wentland Cc: Juston Li Cc: Karol Herbst Cc: Laurent Pinchart Cc: Ilia Mirkin Cc: # v5.1+ Acked-by: Ben Skeggs Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20190801220216.15323-1-lyude@redhat.com --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 8497768f1b41..126703816794 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -780,7 +780,7 @@ nv50_msto_atomic_check(struct drm_encoder *encoder, drm_dp_calc_pbn_mode(crtc_state->adjusted_mode.clock, connector->display_info.bpc * 3); - if (drm_atomic_crtc_needs_modeset(crtc_state)) { + if (crtc_state->mode_changed) { slots = drm_dp_atomic_find_vcpi_slots(state, &mstm->mgr, mstc->port, asyh->dp.pbn); From 41995342b40c418a47603e1321256d2c4a2ed0fb Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 1 Aug 2019 13:06:30 +0200 Subject: [PATCH 244/277] s390/dasd: fix endless loop after read unit address configuration After getting a storage server event that causes the DASD device driver to update its unit address configuration during a device shutdown there is the possibility of an endless loop in the device driver. In the system log there will be ongoing DASD error messages with RC: -19. The reason is that the loop starting the ruac request only terminates when the retry counter is decreased to 0. But in the sleep_on function there are early exit paths that do not decrease the retry counter. Prevent an endless loop by handling those cases separately. Remove the unnecessary do..while loop since the sleep_on function takes care of retries by itself. Fixes: 8e09f21574ea ("[S390] dasd: add hyper PAV support to DASD device driver, part 1") Cc: stable@vger.kernel.org # 2.6.25+ Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_alias.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index b9ce93e9df89..99f86612f775 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -383,6 +383,20 @@ suborder_not_supported(struct dasd_ccw_req *cqr) char msg_format; char msg_no; + /* + * intrc values ENODEV, ENOLINK and EPERM + * will be optained from sleep_on to indicate that no + * IO operation can be started + */ + if (cqr->intrc == -ENODEV) + return 1; + + if (cqr->intrc == -ENOLINK) + return 1; + + if (cqr->intrc == -EPERM) + return 1; + sense = dasd_get_sense(&cqr->irb); if (!sense) return 0; @@ -447,12 +461,8 @@ static int read_unit_address_configuration(struct dasd_device *device, lcu->flags &= ~NEED_UAC_UPDATE; spin_unlock_irqrestore(&lcu->lock, flags); - do { - rc = dasd_sleep_on(cqr); - if (rc && suborder_not_supported(cqr)) - return -EOPNOTSUPP; - } while (rc && (cqr->retries > 0)); - if (rc) { + rc = dasd_sleep_on(cqr); + if (rc && !suborder_not_supported(cqr)) { spin_lock_irqsave(&lcu->lock, flags); lcu->flags |= NEED_UAC_UPDATE; spin_unlock_irqrestore(&lcu->lock, flags); From 9eae7c3bcb52ec0a9f816d830e232e36a20b46d4 Mon Sep 17 00:00:00 2001 From: Fuqian Huang Date: Thu, 4 Jul 2019 10:34:36 +0800 Subject: [PATCH 245/277] drm/exynos: using dev_get_drvdata directly Several drivers cast a struct device pointer to a struct platform_device pointer only to then call platform_get_drvdata(). To improve readability, these constructs can be simplified by using dev_get_drvdata() directly. Signed-off-by: Fuqian Huang Reviewed-by: Emil Velikov Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fimc.c | 2 +- drivers/gpu/drm/exynos/exynos_drm_gsc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index a594ab7be2c0..164d914cbe9a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -44,7 +44,7 @@ static unsigned int fimc_mask = 0xc; module_param_named(fimc_devs, fimc_mask, uint, 0644); MODULE_PARM_DESC(fimc_devs, "Alias mask for assigning FIMC devices to Exynos DRM"); -#define get_fimc_context(dev) platform_get_drvdata(to_platform_device(dev)) +#define get_fimc_context(dev) dev_get_drvdata(dev) enum { FIMC_CLK_LCLK, diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 1e4b21c49a06..1c524db9570f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -58,7 +58,7 @@ #define GSC_COEF_DEPTH 3 #define GSC_AUTOSUSPEND_DELAY 2000 -#define get_gsc_context(dev) platform_get_drvdata(to_platform_device(dev)) +#define get_gsc_context(dev) dev_get_drvdata(dev) #define gsc_read(offset) readl(ctx->regs + (offset)) #define gsc_write(cfg, offset) writel(cfg, ctx->regs + (offset)) From 59d431746f1b3c76fd551b71241d7fdce38a58e9 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 9 Jul 2019 21:01:14 +0900 Subject: [PATCH 246/277] drm/exynos: remove redundant assignment to pointer 'node' The pointer 'node' is being assigned with a value that is never read and is re-assigned later. The assignment is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_g2d.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 50904eee96f7..2a3382d43bc9 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -267,7 +267,7 @@ static inline void g2d_hw_reset(struct g2d_data *g2d) static int g2d_init_cmdlist(struct g2d_data *g2d) { struct device *dev = g2d->dev; - struct g2d_cmdlist_node *node = g2d->cmdlist_node; + struct g2d_cmdlist_node *node; int nr; int ret; struct g2d_buf_info *buf_info; From d6f25bd9d4079165ea90f12d71e06d1dca83cd86 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 9 Jul 2019 21:08:48 +0900 Subject: [PATCH 247/277] drm/exynos: add CONFIG_MMU dependency Compile-testing this driver on a NOMMU configuration shows a link failure: drivers/gpu/drm/exynos/exynos_drm_gem.o: In function `exynos_drm_gem_fault': exynos_drm_gem.c:(.text+0x484): undefined reference to `vmf_insert_mixed' Add a CONFIG_MMU dependency to ensure we only enable this in configurations that build correctly. Many other drm drivers have the same dependency. It would be nice to make this work in MMU-less configurations, but evidently nobody has ever needed this so far. Fixes: 156bdac99061 ("drm/exynos: trigger build of all modules") Signed-off-by: Arnd Bergmann Reviewed-by: Vladimir Murzin Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index 60ce4a8ad9e1..6f7d3b3b3628 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -2,6 +2,7 @@ config DRM_EXYNOS tristate "DRM Support for Samsung SoC EXYNOS Series" depends on OF && DRM && (ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || ARCH_MULTIPLATFORM || COMPILE_TEST) + depends on MMU select DRM_KMS_HELPER select VIDEOMODE_HELPERS select SND_SOC_HDMI_CODEC if SND_SOC From 1bbbab097a05276e312dd2462791d32b21ceb1ee Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 22 Jul 2019 23:25:35 +0100 Subject: [PATCH 248/277] drm/exynos: fix missing decrement of retry counter Currently the retry counter is not being decremented, leading to a potential infinite spin if the scalar_reads don't change state. Addresses-Coverity: ("Infinite loop") Fixes: 280e54c9f614 ("drm/exynos: scaler: Reset hardware before starting the operation") Signed-off-by: Colin Ian King Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_scaler.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c index 9af096479e1c..b24ba948b725 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c +++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c @@ -94,12 +94,12 @@ static inline int scaler_reset(struct scaler_context *scaler) scaler_write(SCALER_CFG_SOFT_RESET, SCALER_CFG); do { cpu_relax(); - } while (retry > 1 && + } while (--retry > 1 && scaler_read(SCALER_CFG) & SCALER_CFG_SOFT_RESET); do { cpu_relax(); scaler_write(1, SCALER_INT_EN); - } while (retry > 0 && scaler_read(SCALER_INT_EN) != 1); + } while (--retry > 0 && scaler_read(SCALER_INT_EN) != 1); return retry ? 0 : -EIO; } From b3980e48528c4d2a9e70b145a5bba328b73a0f93 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 1 Aug 2019 23:25:49 +0900 Subject: [PATCH 249/277] arm64: kprobes: Recover pstate.D in single-step exception handler kprobes manipulates the interrupted PSTATE for single step, and doesn't restore it. Thus, if we put a kprobe where the pstate.D (debug) masked, the mask will be cleared after the kprobe hits. Moreover, in the most complicated case, this can lead a kernel crash with below message when a nested kprobe hits. [ 152.118921] Unexpected kernel single-step exception at EL1 When the 1st kprobe hits, do_debug_exception() will be called. At this point, debug exception (= pstate.D) must be masked (=1). But if another kprobes hits before single-step of the first kprobe (e.g. inside user pre_handler), it unmask the debug exception (pstate.D = 0) and return. Then, when the 1st kprobe setting up single-step, it saves current DAIF, mask DAIF, enable single-step, and restore DAIF. However, since "D" flag in DAIF is cleared by the 2nd kprobe, the single-step exception happens soon after restoring DAIF. This has been introduced by commit 7419333fa15e ("arm64: kprobe: Always clear pstate.D in breakpoint exception handler") To solve this issue, this stores all DAIF bits and restore it after single stepping. Reported-by: Naresh Kamboju Fixes: 7419333fa15e ("arm64: kprobe: Always clear pstate.D in breakpoint exception handler") Reviewed-by: James Morse Tested-by: James Morse Signed-off-by: Masami Hiramatsu Signed-off-by: Will Deacon --- arch/arm64/include/asm/daifflags.h | 2 ++ arch/arm64/kernel/probes/kprobes.c | 40 +++++------------------------- 2 files changed, 8 insertions(+), 34 deletions(-) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 987926ed535e..063c964af705 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -13,6 +13,8 @@ #define DAIF_PROCCTX 0 #define DAIF_PROCCTX_NOIRQ PSR_I_BIT #define DAIF_ERRCTX (PSR_I_BIT | PSR_A_BIT) +#define DAIF_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) + /* mask/save/unmask/restore all exceptions, including interrupts. */ static inline void local_daif_mask(void) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index bd5dfffca272..c4452827419b 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -167,33 +168,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p) __this_cpu_write(current_kprobe, p); } -/* - * When PSTATE.D is set (masked), then software step exceptions can not be - * generated. - * SPSR's D bit shows the value of PSTATE.D immediately before the - * exception was taken. PSTATE.D is set while entering into any exception - * mode, however software clears it for any normal (none-debug-exception) - * mode in the exception entry. Therefore, when we are entering into kprobe - * breakpoint handler from any normal mode then SPSR.D bit is already - * cleared, however it is set when we are entering from any debug exception - * mode. - * Since we always need to generate single step exception after a kprobe - * breakpoint exception therefore we need to clear it unconditionally, when - * we become sure that the current breakpoint exception is for kprobe. - */ -static void __kprobes -spsr_set_debug_flag(struct pt_regs *regs, int mask) -{ - unsigned long spsr = regs->pstate; - - if (mask) - spsr |= PSR_D_BIT; - else - spsr &= ~PSR_D_BIT; - - regs->pstate = spsr; -} - /* * Interrupts need to be disabled before single-step mode is set, and not * reenabled until after single-step mode ends. @@ -205,17 +179,17 @@ spsr_set_debug_flag(struct pt_regs *regs, int mask) static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb, struct pt_regs *regs) { - kcb->saved_irqflag = regs->pstate; + kcb->saved_irqflag = regs->pstate & DAIF_MASK; regs->pstate |= PSR_I_BIT; + /* Unmask PSTATE.D for enabling software step exceptions. */ + regs->pstate &= ~PSR_D_BIT; } static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb, struct pt_regs *regs) { - if (kcb->saved_irqflag & PSR_I_BIT) - regs->pstate |= PSR_I_BIT; - else - regs->pstate &= ~PSR_I_BIT; + regs->pstate &= ~DAIF_MASK; + regs->pstate |= kcb->saved_irqflag; } static void __kprobes @@ -252,8 +226,6 @@ static void __kprobes setup_singlestep(struct kprobe *p, set_ss_context(kcb, slot); /* mark pending ss */ - spsr_set_debug_flag(regs, 0); - /* IRQs and single stepping do not mix well. */ kprobes_save_local_irqflag(kcb, regs); kernel_enable_single_step(regs); From d8bb6718c4db9bcd075dde7ff55d46091ccfae15 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 1 Aug 2019 23:36:14 +0900 Subject: [PATCH 250/277] arm64: Make debug exception handlers visible from RCU Make debug exceptions visible from RCU so that synchronize_rcu() correctly track the debug exception handler. This also introduces sanity checks for user-mode exceptions as same as x86's ist_enter()/ist_exit(). The debug exception can interrupt in idle task. For example, it warns if we put a kprobe on a function called from idle task as below. The warning message showed that the rcu_read_lock() caused this problem. But actually, this means the RCU is lost the context which is already in NMI/IRQ. /sys/kernel/debug/tracing # echo p default_idle_call >> kprobe_events /sys/kernel/debug/tracing # echo 1 > events/kprobes/enable /sys/kernel/debug/tracing # [ 135.122237] [ 135.125035] ============================= [ 135.125310] WARNING: suspicious RCU usage [ 135.125581] 5.2.0-08445-g9187c508bdc7 #20 Not tainted [ 135.125904] ----------------------------- [ 135.126205] include/linux/rcupdate.h:594 rcu_read_lock() used illegally while idle! [ 135.126839] [ 135.126839] other info that might help us debug this: [ 135.126839] [ 135.127410] [ 135.127410] RCU used illegally from idle CPU! [ 135.127410] rcu_scheduler_active = 2, debug_locks = 1 [ 135.128114] RCU used illegally from extended quiescent state! [ 135.128555] 1 lock held by swapper/0/0: [ 135.128944] #0: (____ptrval____) (rcu_read_lock){....}, at: call_break_hook+0x0/0x178 [ 135.130499] [ 135.130499] stack backtrace: [ 135.131192] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.2.0-08445-g9187c508bdc7 #20 [ 135.131841] Hardware name: linux,dummy-virt (DT) [ 135.132224] Call trace: [ 135.132491] dump_backtrace+0x0/0x140 [ 135.132806] show_stack+0x24/0x30 [ 135.133133] dump_stack+0xc4/0x10c [ 135.133726] lockdep_rcu_suspicious+0xf8/0x108 [ 135.134171] call_break_hook+0x170/0x178 [ 135.134486] brk_handler+0x28/0x68 [ 135.134792] do_debug_exception+0x90/0x150 [ 135.135051] el1_dbg+0x18/0x8c [ 135.135260] default_idle_call+0x0/0x44 [ 135.135516] cpu_startup_entry+0x2c/0x30 [ 135.135815] rest_init+0x1b0/0x280 [ 135.136044] arch_call_rest_init+0x14/0x1c [ 135.136305] start_kernel+0x4d4/0x500 [ 135.136597] So make debug exception visible to RCU can fix this warning. Reported-by: Naresh Kamboju Acked-by: Paul E. McKenney Signed-off-by: Masami Hiramatsu Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 57 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 8 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 9568c116ac7f..cfd65b63f36f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -777,6 +777,53 @@ void __init hook_debug_fault_code(int nr, debug_fault_info[nr].name = name; } +/* + * In debug exception context, we explicitly disable preemption despite + * having interrupts disabled. + * This serves two purposes: it makes it much less likely that we would + * accidentally schedule in exception context and it will force a warning + * if we somehow manage to schedule by accident. + */ +static void debug_exception_enter(struct pt_regs *regs) +{ + /* + * Tell lockdep we disabled irqs in entry.S. Do nothing if they were + * already disabled to preserve the last enabled/disabled addresses. + */ + if (interrupts_enabled(regs)) + trace_hardirqs_off(); + + if (user_mode(regs)) { + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); + } else { + /* + * We might have interrupted pretty much anything. In + * fact, if we're a debug exception, we can even interrupt + * NMI processing. We don't want this code makes in_nmi() + * to return true, but we need to notify RCU. + */ + rcu_nmi_enter(); + } + + preempt_disable(); + + /* This code is a bit fragile. Test it. */ + RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work"); +} +NOKPROBE_SYMBOL(debug_exception_enter); + +static void debug_exception_exit(struct pt_regs *regs) +{ + preempt_enable_no_resched(); + + if (!user_mode(regs)) + rcu_nmi_exit(); + + if (interrupts_enabled(regs)) + trace_hardirqs_on(); +} +NOKPROBE_SYMBOL(debug_exception_exit); + #ifdef CONFIG_ARM64_ERRATUM_1463225 DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); @@ -817,12 +864,7 @@ asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint, if (cortex_a76_erratum_1463225_debug_handler(regs)) return; - /* - * Tell lockdep we disabled irqs in entry.S. Do nothing if they were - * already disabled to preserve the last enabled/disabled addresses. - */ - if (interrupts_enabled(regs)) - trace_hardirqs_off(); + debug_exception_enter(regs); if (user_mode(regs) && !is_ttbr0_addr(pc)) arm64_apply_bp_hardening(); @@ -832,7 +874,6 @@ asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint, inf->sig, inf->code, (void __user *)pc, esr); } - if (interrupts_enabled(regs)) - trace_hardirqs_on(); + debug_exception_exit(regs); } NOKPROBE_SYMBOL(do_debug_exception); From 3cdd98606750a5a1d1c8bcda5b481cb86ed67b3b Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 29 Jul 2019 01:23:46 +0200 Subject: [PATCH 251/277] s390/zcrypt: adjust switch fall through comments for -Wimplicit-fallthrough Silence the following warnings when built with -Wimplicit-fallthrough=3 enabled by default since 5.3-rc2: In file included from ./include/linux/preempt.h:11, from ./include/linux/spinlock.h:51, from ./include/linux/mmzone.h:8, from ./include/linux/gfp.h:6, from ./include/linux/slab.h:15, from drivers/s390/crypto/ap_queue.c:13: drivers/s390/crypto/ap_queue.c: In function 'ap_sm_recv': ./include/linux/list.h:577:2: warning: this statement may fall through [-Wimplicit-fallthrough=] 577 | for (pos = list_first_entry(head, typeof(*pos), member); \ | ^~~ drivers/s390/crypto/ap_queue.c:147:3: note: in expansion of macro 'list_for_each_entry' 147 | list_for_each_entry(ap_msg, &aq->pendingq, list) { | ^~~~~~~~~~~~~~~~~~~ drivers/s390/crypto/ap_queue.c:155:2: note: here 155 | case AP_RESPONSE_NO_PENDING_REPLY: | ^~~~ drivers/s390/crypto/zcrypt_msgtype6.c: In function 'convert_response_ep11_xcrb': drivers/s390/crypto/zcrypt_msgtype6.c:871:6: warning: this statement may fall through [-Wimplicit-fallthrough=] 871 | if (msg->cprbx.cprb_ver_id == 0x04) | ^ drivers/s390/crypto/zcrypt_msgtype6.c:874:2: note: here 874 | default: /* Unknown response type, this should NEVER EVER happen */ | ^~~~~~~ drivers/s390/crypto/zcrypt_msgtype6.c: In function 'convert_response_rng': drivers/s390/crypto/zcrypt_msgtype6.c:901:6: warning: this statement may fall through [-Wimplicit-fallthrough=] 901 | if (msg->cprbx.cprb_ver_id == 0x02) | ^ drivers/s390/crypto/zcrypt_msgtype6.c:907:2: note: here 907 | default: /* Unknown response type, this should NEVER EVER happen */ | ^~~~~~~ drivers/s390/crypto/zcrypt_msgtype6.c: In function 'convert_response_xcrb': drivers/s390/crypto/zcrypt_msgtype6.c:838:6: warning: this statement may fall through [-Wimplicit-fallthrough=] 838 | if (msg->cprbx.cprb_ver_id == 0x02) | ^ drivers/s390/crypto/zcrypt_msgtype6.c:844:2: note: here 844 | default: /* Unknown response type, this should NEVER EVER happen */ | ^~~~~~~ drivers/s390/crypto/zcrypt_msgtype6.c: In function 'convert_response_ica': drivers/s390/crypto/zcrypt_msgtype6.c:801:6: warning: this statement may fall through [-Wimplicit-fallthrough=] 801 | if (msg->cprbx.cprb_ver_id == 0x02) | ^ drivers/s390/crypto/zcrypt_msgtype6.c:808:2: note: here 808 | default: /* Unknown response type, this should NEVER EVER happen */ | ^~~~~~~ Acked-by: Patrick Steuer Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_queue.c | 1 + drivers/s390/crypto/zcrypt_msgtype6.c | 17 ++++------------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 5ea83dc4f1d7..dad2be333d82 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -152,6 +152,7 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq) ap_msg->receive(aq, ap_msg, aq->reply); break; } + /* fall through */ case AP_RESPONSE_NO_PENDING_REPLY: if (!status.queue_empty || aq->queue_count <= 0) break; diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 12fe9deb265e..a36251d138fb 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -801,10 +801,7 @@ static int convert_response_ica(struct zcrypt_queue *zq, if (msg->cprbx.cprb_ver_id == 0x02) return convert_type86_ica(zq, reply, outputdata, outputdatalength); - /* - * Fall through, no break, incorrect cprb version is an unknown - * response - */ + /* fall through - wrong cprb version is an unknown response */ default: /* Unknown response type, this should NEVER EVER happen */ zq->online = 0; pr_err("Cryptographic device %02x.%04x failed and was set offline\n", @@ -837,10 +834,7 @@ static int convert_response_xcrb(struct zcrypt_queue *zq, } if (msg->cprbx.cprb_ver_id == 0x02) return convert_type86_xcrb(zq, reply, xcRB); - /* - * Fall through, no break, incorrect cprb version is an unknown - * response - */ + /* fall through - wrong cprb version is an unknown response */ default: /* Unknown response type, this should NEVER EVER happen */ xcRB->status = 0x0008044DL; /* HDD_InvalidParm */ zq->online = 0; @@ -870,7 +864,7 @@ static int convert_response_ep11_xcrb(struct zcrypt_queue *zq, return convert_error(zq, reply); if (msg->cprbx.cprb_ver_id == 0x04) return convert_type86_ep11_xcrb(zq, reply, xcRB); - /* Fall through, no break, incorrect cprb version is an unknown resp.*/ + /* fall through - wrong cprb version is an unknown resp */ default: /* Unknown response type, this should NEVER EVER happen */ zq->online = 0; pr_err("Cryptographic device %02x.%04x failed and was set offline\n", @@ -900,10 +894,7 @@ static int convert_response_rng(struct zcrypt_queue *zq, return -EINVAL; if (msg->cprbx.cprb_ver_id == 0x02) return convert_type86_rng(zq, reply, data); - /* - * Fall through, no break, incorrect cprb version is an unknown - * response - */ + /* fall through - wrong cprb version is an unknown response */ default: /* Unknown response type, this should NEVER EVER happen */ zq->online = 0; pr_err("Cryptographic device %02x.%04x failed and was set offline\n", From 63dc6e63e682cf756ab8c18aa1b85b0efb358dad Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 1 Aug 2019 13:44:58 +0100 Subject: [PATCH 252/277] Revert "drm/vgem: fix cache synchronization on arm/arm64" commit 7e9e5ead55be ("drm/vgem: fix cache synchronization on arm/arm64") broke all of the !llc i915-vgem coherency tests in CI, and left the HW very, very unhappy (which is even more scary). Fixes: 7e9e5ead55be ("drm/vgem: fix cache synchronization on arm/arm64") Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Rob Clark Cc: Sean Paul Acked-by: Sean Paul Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20190801124458.24949-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/vgem/vgem_drv.c | 130 ++++++++++++-------------------- 1 file changed, 47 insertions(+), 83 deletions(-) diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index fc04803ff403..11a8f99ba18c 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -47,16 +47,10 @@ static struct vgem_device { struct platform_device *platform; } *vgem_device; -static void sync_and_unpin(struct drm_vgem_gem_object *bo); -static struct page **pin_and_sync(struct drm_vgem_gem_object *bo); - static void vgem_gem_free_object(struct drm_gem_object *obj) { struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj); - if (!obj->import_attach) - sync_and_unpin(vgem_obj); - kvfree(vgem_obj->pages); mutex_destroy(&vgem_obj->pages_lock); @@ -84,15 +78,40 @@ static vm_fault_t vgem_gem_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS; mutex_lock(&obj->pages_lock); - if (!obj->pages) - pin_and_sync(obj); if (obj->pages) { get_page(obj->pages[page_offset]); vmf->page = obj->pages[page_offset]; ret = 0; } mutex_unlock(&obj->pages_lock); + if (ret) { + struct page *page; + page = shmem_read_mapping_page( + file_inode(obj->base.filp)->i_mapping, + page_offset); + if (!IS_ERR(page)) { + vmf->page = page; + ret = 0; + } else switch (PTR_ERR(page)) { + case -ENOSPC: + case -ENOMEM: + ret = VM_FAULT_OOM; + break; + case -EBUSY: + ret = VM_FAULT_RETRY; + break; + case -EFAULT: + case -EINVAL: + ret = VM_FAULT_SIGBUS; + break; + default: + WARN_ON(PTR_ERR(page)); + ret = VM_FAULT_SIGBUS; + break; + } + + } return ret; } @@ -258,93 +277,32 @@ static const struct file_operations vgem_driver_fops = { .release = drm_release, }; -/* Called under pages_lock, except in free path (where it can't race): */ -static void sync_and_unpin(struct drm_vgem_gem_object *bo) -{ - struct drm_device *dev = bo->base.dev; - - if (bo->table) { - dma_sync_sg_for_cpu(dev->dev, bo->table->sgl, - bo->table->nents, DMA_BIDIRECTIONAL); - sg_free_table(bo->table); - kfree(bo->table); - bo->table = NULL; - } - - if (bo->pages) { - drm_gem_put_pages(&bo->base, bo->pages, true, true); - bo->pages = NULL; - } -} - -static struct page **pin_and_sync(struct drm_vgem_gem_object *bo) -{ - struct drm_device *dev = bo->base.dev; - int npages = bo->base.size >> PAGE_SHIFT; - struct page **pages; - struct sg_table *sgt; - - WARN_ON(!mutex_is_locked(&bo->pages_lock)); - - pages = drm_gem_get_pages(&bo->base); - if (IS_ERR(pages)) { - bo->pages_pin_count--; - mutex_unlock(&bo->pages_lock); - return pages; - } - - sgt = drm_prime_pages_to_sg(pages, npages); - if (IS_ERR(sgt)) { - dev_err(dev->dev, - "failed to allocate sgt: %ld\n", - PTR_ERR(bo->table)); - drm_gem_put_pages(&bo->base, pages, false, false); - mutex_unlock(&bo->pages_lock); - return ERR_CAST(bo->table); - } - - /* - * Flush the object from the CPU cache so that importers - * can rely on coherent indirect access via the exported - * dma-address. - */ - dma_sync_sg_for_device(dev->dev, sgt->sgl, - sgt->nents, DMA_BIDIRECTIONAL); - - bo->pages = pages; - bo->table = sgt; - - return pages; -} - static struct page **vgem_pin_pages(struct drm_vgem_gem_object *bo) { - struct page **pages; - mutex_lock(&bo->pages_lock); - if (bo->pages_pin_count++ == 0 && !bo->pages) { - pages = pin_and_sync(bo); - } else { - WARN_ON(!bo->pages); - pages = bo->pages; + if (bo->pages_pin_count++ == 0) { + struct page **pages; + + pages = drm_gem_get_pages(&bo->base); + if (IS_ERR(pages)) { + bo->pages_pin_count--; + mutex_unlock(&bo->pages_lock); + return pages; + } + + bo->pages = pages; } mutex_unlock(&bo->pages_lock); - return pages; + return bo->pages; } static void vgem_unpin_pages(struct drm_vgem_gem_object *bo) { - /* - * We shouldn't hit this for imported bo's.. in the import - * case we don't own the scatter-table - */ - WARN_ON(bo->base.import_attach); - mutex_lock(&bo->pages_lock); if (--bo->pages_pin_count == 0) { - WARN_ON(!bo->table); - sync_and_unpin(bo); + drm_gem_put_pages(&bo->base, bo->pages, true, true); + bo->pages = NULL; } mutex_unlock(&bo->pages_lock); } @@ -352,12 +310,18 @@ static void vgem_unpin_pages(struct drm_vgem_gem_object *bo) static int vgem_prime_pin(struct drm_gem_object *obj) { struct drm_vgem_gem_object *bo = to_vgem_bo(obj); + long n_pages = obj->size >> PAGE_SHIFT; struct page **pages; pages = vgem_pin_pages(bo); if (IS_ERR(pages)) return PTR_ERR(pages); + /* Flush the object from the CPU cache so that importers can rely + * on coherent indirect access via the exported dma-address. + */ + drm_clflush_pages(pages, n_pages); + return 0; } From 8493b2a06fc5b77ef5c579dc32b12761f7b7a84c Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Tue, 30 Jul 2019 15:44:07 +0200 Subject: [PATCH 253/277] mtd: rawnand: micron: handle on-die "ECC-off" devices correctly Some devices are not supposed to support on-die ECC but experience shows that internal ECC machinery can actually be enabled through the "SET FEATURE (EFh)" command, even if a read of the "READ ID Parameter Tables" returns that it is not. Currently, the driver checks the "READ ID Parameter" field directly after having enabled the feature. If the check fails it returns immediately but leaves the ECC on. When using buggy chips like MT29F2G08ABAGA and MT29F2G08ABBGA, all future read/program cycles will go through the on-die ECC, confusing the host controller which is supposed to be the one handling correction. To address this in a common way we need to turn off the on-die ECC directly after reading the "READ ID Parameter" and before checking the "ECC status". Cc: stable@vger.kernel.org Fixes: dbc44edbf833 ("mtd: rawnand: micron: Fix on-die ECC detection logic") Signed-off-by: Marco Felsch Reviewed-by: Boris Brezillon Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/nand_micron.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/nand/raw/nand_micron.c b/drivers/mtd/nand/raw/nand_micron.c index 1622d3145587..8ca9fad6e6ad 100644 --- a/drivers/mtd/nand/raw/nand_micron.c +++ b/drivers/mtd/nand/raw/nand_micron.c @@ -390,6 +390,14 @@ static int micron_supports_on_die_ecc(struct nand_chip *chip) (chip->id.data[4] & MICRON_ID_INTERNAL_ECC_MASK) != 0x2) return MICRON_ON_DIE_UNSUPPORTED; + /* + * It seems that there are devices which do not support ECC officially. + * At least the MT29F2G08ABAGA / MT29F2G08ABBGA devices supports + * enabling the ECC feature but don't reflect that to the READ_ID table. + * So we have to guarantee that we disable the ECC feature directly + * after we did the READ_ID table command. Later we can evaluate the + * ECC_ENABLE support. + */ ret = micron_nand_on_die_ecc_setup(chip, true); if (ret) return MICRON_ON_DIE_UNSUPPORTED; @@ -398,13 +406,13 @@ static int micron_supports_on_die_ecc(struct nand_chip *chip) if (ret) return MICRON_ON_DIE_UNSUPPORTED; - if (!(id[4] & MICRON_ID_ECC_ENABLED)) - return MICRON_ON_DIE_UNSUPPORTED; - ret = micron_nand_on_die_ecc_setup(chip, false); if (ret) return MICRON_ON_DIE_UNSUPPORTED; + if (!(id[4] & MICRON_ID_ECC_ENABLED)) + return MICRON_ON_DIE_UNSUPPORTED; + ret = nand_readid_op(chip, 0, id, sizeof(id)); if (ret) return MICRON_ON_DIE_UNSUPPORTED; From 2d75989d2d92b71f3f34f2704ac109897a87319f Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Fri, 19 Jul 2019 13:59:12 +0530 Subject: [PATCH 254/277] mtd: hyperbus: Kconfig: Fix HBMC_AM654 dependencies On x86_64, when CONFIG_OF is not disabled: WARNING: unmet direct dependencies detected for MUX_MMIO Depends on [n]: MULTIPLEXER [=y] && (OF [=n] || COMPILE_TEST [=n]) Selected by [y]: - HBMC_AM654 [=y] && MTD [=y] && MTD_HYPERBUS [=y] due to config HBMC_AM654 tristate "HyperBus controller driver for AM65x SoC" select MULTIPLEXER select MUX_MMIO Fix this by making HBMC_AM654 imply MUX_MMIO instead of select so that dependencies are taken care of. MUX_MMIO is optional for functioning of driver. Fixes: b07079f1642c ("mtd: hyperbus: Add driver for TI's HyperBus memory controller") Reported-by: Randy Dunlap Signed-off-by: Vignesh Raghavendra Acked-by: Randy Dunlap # build-tested Signed-off-by: Miquel Raynal --- drivers/mtd/hyperbus/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/hyperbus/Kconfig b/drivers/mtd/hyperbus/Kconfig index cff6bbd226f5..1c691df8eff7 100644 --- a/drivers/mtd/hyperbus/Kconfig +++ b/drivers/mtd/hyperbus/Kconfig @@ -15,7 +15,7 @@ if MTD_HYPERBUS config HBMC_AM654 tristate "HyperBus controller driver for AM65x SoC" select MULTIPLEXER - select MUX_MMIO + imply MUX_MMIO help This is the driver for HyperBus controller on TI's AM65x and other SoCs From 2b372a9685a757a1d3ab30615ef42b2db7c45298 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 31 Jul 2019 10:07:06 +0200 Subject: [PATCH 255/277] mtd: hyperbus: Add hardware dependency to AM654 driver The hbmc-am654 driver is for the TI AM654, which is an ARM64 SoC, so don't propose this driver on other architectures unless build-testing. Fixes: b07079f1642c ("mtd: hyperbus: Add driver for TI's HyperBus memory controller") Signed-off-by: Jean Delvare Cc: Vignesh Raghavendra Cc: Miquel Raynal Signed-off-by: Miquel Raynal --- drivers/mtd/hyperbus/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/hyperbus/Kconfig b/drivers/mtd/hyperbus/Kconfig index 1c691df8eff7..b4e3caf7d799 100644 --- a/drivers/mtd/hyperbus/Kconfig +++ b/drivers/mtd/hyperbus/Kconfig @@ -14,6 +14,7 @@ if MTD_HYPERBUS config HBMC_AM654 tristate "HyperBus controller driver for AM65x SoC" + depends on ARM64 || COMPILE_TEST select MULTIPLEXER imply MUX_MMIO help From 68d8681e97bd1c90259f341c1695af05002070ef Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 2 Aug 2019 21:48:33 -0700 Subject: [PATCH 256/277] kernel/signal.c: fix a kernel-doc markup The kernel-doc parser doesn't handle expressions with %foo*. Instead, when an asterisk should be part of a constant, it uses an alternative notation: `foo*`. Link: http://lkml.kernel.org/r/7f18c2e0b5e39e6b7eb55ddeb043b8b260b49f2d.1563361575.git.mchehab+samsung@kernel.org Signed-off-by: Mauro Carvalho Chehab Cc: Deepa Dinamani Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/signal.c b/kernel/signal.c index 349f5a67f100..e667be6907d7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -349,7 +349,7 @@ void task_clear_jobctl_pending(struct task_struct *task, unsigned long mask) * @task has %JOBCTL_STOP_PENDING set and is participating in a group stop. * Group stop states are cleared and the group stop count is consumed if * %JOBCTL_STOP_CONSUME was set. If the consumption completes the group - * stop, the appropriate %SIGNAL_* flags are set. + * stop, the appropriate `SIGNAL_*` flags are set. * * CONTEXT: * Must be called with @task->sighand->siglock held. From df9576def004d2cd5beedc00cb6e8901427634b9 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Fri, 2 Aug 2019 21:48:37 -0700 Subject: [PATCH 257/277] Revert "kmemleak: allow to coexist with fault injection" When running ltp's oom test with kmemleak enabled, the below warning was triggerred since kernel detects __GFP_NOFAIL & ~__GFP_DIRECT_RECLAIM is passed in: WARNING: CPU: 105 PID: 2138 at mm/page_alloc.c:4608 __alloc_pages_nodemask+0x1c31/0x1d50 Modules linked in: loop dax_pmem dax_pmem_core ip_tables x_tables xfs virtio_net net_failover virtio_blk failover ata_generic virtio_pci virtio_ring virtio libata CPU: 105 PID: 2138 Comm: oom01 Not tainted 5.2.0-next-20190710+ #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014 RIP: 0010:__alloc_pages_nodemask+0x1c31/0x1d50 ... kmemleak_alloc+0x4e/0xb0 kmem_cache_alloc+0x2a7/0x3e0 mempool_alloc_slab+0x2d/0x40 mempool_alloc+0x118/0x2b0 bio_alloc_bioset+0x19d/0x350 get_swap_bio+0x80/0x230 __swap_writepage+0x5ff/0xb20 The mempool_alloc_slab() clears __GFP_DIRECT_RECLAIM, however kmemleak has __GFP_NOFAIL set all the time due to d9570ee3bd1d4f2 ("kmemleak: allow to coexist with fault injection"). But, it doesn't make any sense to have __GFP_NOFAIL and ~__GFP_DIRECT_RECLAIM specified at the same time. According to the discussion on the mailing list, the commit should be reverted for short term solution. Catalin Marinas would follow up with a better solution for longer term. The failure rate of kmemleak metadata allocation may increase in some circumstances, but this should be expected side effect. Link: http://lkml.kernel.org/r/1563299431-111710-1-git-send-email-yang.shi@linux.alibaba.com Fixes: d9570ee3bd1d4f2 ("kmemleak: allow to coexist with fault injection") Signed-off-by: Yang Shi Suggested-by: Catalin Marinas Acked-by: Michal Hocko Cc: Dmitry Vyukov Cc: David Rientjes Cc: Matthew Wilcox Cc: Qian Cai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index dbbd518fb6b3..6e9e8cca663e 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -114,7 +114,7 @@ /* GFP bitmask for kmemleak internal allocations */ #define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \ __GFP_NORETRY | __GFP_NOMEMALLOC | \ - __GFP_NOWARN | __GFP_NOFAIL) + __GFP_NOWARN) /* scanning area inside a memory block */ struct kmemleak_scan_area { From 7bc36e3ce91471b6377c8eadc0a2f220a2280083 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 2 Aug 2019 21:48:40 -0700 Subject: [PATCH 258/277] ocfs2: remove set but not used variable 'last_hash' Fixes gcc '-Wunused-but-set-variable' warning: fs/ocfs2/xattr.c: In function ocfs2_xattr_bucket_find: fs/ocfs2/xattr.c:3828:6: warning: variable last_hash set but not used [-Wunused-but-set-variable] It's never used and can be removed. Link: http://lkml.kernel.org/r/20190716132110.34836-1-yuehaibing@huawei.com Signed-off-by: YueHaibing Acked-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/xattr.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 385f3aaa2448..90c830e3758e 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3825,7 +3825,6 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); int low_bucket = 0, bucket, high_bucket; struct ocfs2_xattr_bucket *search; - u32 last_hash; u64 blkno, lower_blkno = 0; search = ocfs2_xattr_bucket_new(inode); @@ -3869,8 +3868,6 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, if (xh->xh_count) xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; - last_hash = le32_to_cpu(xe->xe_name_hash); - /* record lower_blkno which may be the insert place. */ lower_blkno = blkno; From fa1e512fac717f34e7c12d7a384c46e90a647392 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Fri, 2 Aug 2019 21:48:44 -0700 Subject: [PATCH 259/277] mm: vmscan: check if mem cgroup is disabled or not before calling memcg slab shrinker Shakeel Butt reported premature oom on kernel with "cgroup_disable=memory" since mem_cgroup_is_root() returns false even though memcg is actually NULL. The drop_caches is also broken. It is because commit aeed1d325d42 ("mm/vmscan.c: generalize shrink_slab() calls in shrink_node()") removed the !memcg check before !mem_cgroup_is_root(). And, surprisingly root memcg is allocated even though memory cgroup is disabled by kernel boot parameter. Add mem_cgroup_disabled() check to make reclaimer work as expected. Link: http://lkml.kernel.org/r/1563385526-20805-1-git-send-email-yang.shi@linux.alibaba.com Fixes: aeed1d325d42 ("mm/vmscan.c: generalize shrink_slab() calls in shrink_node()") Signed-off-by: Yang Shi Reported-by: Shakeel Butt Reviewed-by: Shakeel Butt Reviewed-by: Kirill Tkhai Acked-by: Michal Hocko Cc: Jan Hadrava Cc: Vladimir Davydov Cc: Johannes Weiner Cc: Roman Gushchin Cc: Hugh Dickins Cc: Qian Cai Cc: Kirill A. Shutemov Cc: [4.19+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 44df66a98f2a..dbdc46a84f63 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -699,7 +699,14 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid, unsigned long ret, freed = 0; struct shrinker *shrinker; - if (!mem_cgroup_is_root(memcg)) + /* + * The root memcg might be allocated even though memcg is disabled + * via "cgroup_disable=memory" boot parameter. This could make + * mem_cgroup_is_root() return false, then just run memcg slab + * shrink, but skip global shrink. This may result in premature + * oom. + */ + if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg)) return shrink_slab_memcg(gfp_mask, nid, memcg, priority); if (!down_read_trylock(&shrinker_rwsem)) From ebdf4de5642fb6580b0763158b6b4b791c4d6a4d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 2 Aug 2019 21:48:47 -0700 Subject: [PATCH 260/277] mm: migrate: fix reference check race between __find_get_block() and migration buffer_migrate_page_norefs() can race with bh users in the following way: CPU1 CPU2 buffer_migrate_page_norefs() buffer_migrate_lock_buffers() checks bh refs spin_unlock(&mapping->private_lock) __find_get_block() spin_lock(&mapping->private_lock) grab bh ref spin_unlock(&mapping->private_lock) move page do bh work This can result in various issues like lost updates to buffers (i.e. metadata corruption) or use after free issues for the old page. This patch closes the race by holding mapping->private_lock while the mapping is being moved to a new page. Ordinarily, a reference can be taken outside of the private_lock using the per-cpu BH LRU but the references are checked and the LRU invalidated if necessary. The private_lock is held once the references are known so the buffer lookup slow path will spin on the private_lock. Between the page lock and private_lock, it should be impossible for other references to be acquired and updates to happen during the migration. A user had reported data corruption issues on a distribution kernel with a similar page migration implementation as mainline. The data corruption could not be reproduced with this patch applied. A small number of migration-intensive tests were run and no performance problems were noted. [mgorman@techsingularity.net: Changelog, removed tracing] Link: http://lkml.kernel.org/r/20190718090238.GF24383@techsingularity.net Fixes: 89cb0888ca14 "mm: migrate: provide buffer_migrate_page_norefs()" Signed-off-by: Jan Kara Signed-off-by: Mel Gorman Cc: [5.0+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index 8992741f10aa..515718392b24 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -767,12 +767,12 @@ recheck_buffers: } bh = bh->b_this_page; } while (bh != head); - spin_unlock(&mapping->private_lock); if (busy) { if (invalidated) { rc = -EAGAIN; goto unlock_buffers; } + spin_unlock(&mapping->private_lock); invalidate_bh_lrus(); invalidated = true; goto recheck_buffers; @@ -805,6 +805,8 @@ recheck_buffers: rc = MIGRATEPAGE_SUCCESS; unlock_buffers: + if (check_refs) + spin_unlock(&mapping->private_lock); bh = head; do { unlock_buffer(bh); From 670105a25608affe01cb0ccdc2a1f4bd2327172b Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 2 Aug 2019 21:48:51 -0700 Subject: [PATCH 261/277] mm: compaction: avoid 100% CPU usage during compaction when a task is killed "howaboutsynergy" reported via kernel buzilla number 204165 that compact_zone_order was consuming 100% CPU during a stress test for prolonged periods of time. Specifically the following command, which should exit in 10 seconds, was taking an excessive time to finish while the CPU was pegged at 100%. stress -m 220 --vm-bytes 1000000000 --timeout 10 Tracing indicated a pattern as follows stress-3923 [007] 519.106208: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 stress-3923 [007] 519.106212: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 stress-3923 [007] 519.106216: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 stress-3923 [007] 519.106219: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 stress-3923 [007] 519.106223: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 stress-3923 [007] 519.106227: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 stress-3923 [007] 519.106231: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 stress-3923 [007] 519.106235: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 stress-3923 [007] 519.106238: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 stress-3923 [007] 519.106242: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 Note that compaction is entered in rapid succession while scanning and isolating nothing. The problem is that when a task that is compacting receives a fatal signal, it retries indefinitely instead of exiting while making no progress as a fatal signal is pending. It's not easy to trigger this condition although enabling zswap helps on the basis that the timing is altered. A very small window has to be hit for the problem to occur (signal delivered while compacting and isolating a PFN for migration that is not aligned to SWAP_CLUSTER_MAX). This was reproduced locally -- 16G single socket system, 8G swap, 30% zswap configured, vm-bytes 22000000000 using Colin Kings stress-ng implementation from github running in a loop until the problem hits). Tracing recorded the problem occurring almost 200K times in a short window. With this patch, the problem hit 4 times but the task existed normally instead of consuming CPU. This problem has existed for some time but it was made worse by commit cf66f0700c8f ("mm, compaction: do not consider a need to reschedule as contention"). Before that commit, if the same condition was hit then locks would be quickly contended and compaction would exit that way. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=204165 Link: http://lkml.kernel.org/r/20190718085708.GE24383@techsingularity.net Fixes: cf66f0700c8f ("mm, compaction: do not consider a need to reschedule as contention") Signed-off-by: Mel Gorman Reviewed-by: Vlastimil Babka Cc: [5.1+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/compaction.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 9e1b9acb116b..952dc2fb24e5 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -842,13 +842,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, /* * Periodically drop the lock (if held) regardless of its - * contention, to give chance to IRQs. Abort async compaction - * if contended. + * contention, to give chance to IRQs. Abort completely if + * a fatal signal is pending. */ if (!(low_pfn % SWAP_CLUSTER_MAX) && compact_unlock_should_abort(&pgdat->lru_lock, - flags, &locked, cc)) - break; + flags, &locked, cc)) { + low_pfn = 0; + goto fatal_pending; + } if (!pfn_valid_within(low_pfn)) goto isolate_fail; @@ -1060,6 +1062,7 @@ isolate_abort: trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn, nr_scanned, nr_isolated); +fatal_pending: cc->total_migrate_scanned += nr_scanned; if (nr_isolated) count_compact_events(COMPACTISOLATED, nr_isolated); From ebb6d35a74ce21ce1673b8f404c1039d5a1e7e2d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Aug 2019 21:48:54 -0700 Subject: [PATCH 262/277] kasan: remove clang version check for KASAN_STACK asan-stack mode still uses dangerously large kernel stacks of tens of kilobytes in some drivers, and it does not seem that anyone is working on the clang bug. Turn it off for all clang versions to prevent users from accidentally enabling it once they update to clang-9, and to help automated build testing with clang-9. Link: https://bugs.llvm.org/show_bug.cgi?id=38809 Link: http://lkml.kernel.org/r/20190719200347.2596375-1-arnd@arndb.de Fixes: 6baec880d7a5 ("kasan: turn off asan-stack for clang-8 and earlier") Signed-off-by: Arnd Bergmann Acked-by: Nick Desaulniers Reviewed-by: Mark Brown Reviewed-by: Andrey Ryabinin Cc: Qian Cai Cc: Andrey Konovalov Cc: Vasily Gorbik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.kasan | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 4fafba1a923b..7fa97a8b5717 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -106,7 +106,6 @@ endchoice config KASAN_STACK_ENABLE bool "Enable stack instrumentation (unsafe)" if CC_IS_CLANG && !COMPILE_TEST - default !(CLANG_VERSION < 90000) depends on KASAN help The LLVM stack address sanitizer has a know problem that @@ -115,11 +114,11 @@ config KASAN_STACK_ENABLE Disabling asan-stack makes it safe to run kernels build with clang-8 with KASAN enabled, though it loses some of the functionality. - This feature is always disabled when compile-testing with clang-8 - or earlier to avoid cluttering the output in stack overflow - warnings, but clang-8 users can still enable it for builds without - CONFIG_COMPILE_TEST. On gcc and later clang versions it is - assumed to always be safe to use and enabled by default. + This feature is always disabled when compile-testing with clang + to avoid cluttering the output in stack overflow warnings, + but clang users can still enable it for builds without + CONFIG_COMPILE_TEST. On gcc it is assumed to always be safe + to use and enabled by default. config KASAN_STACK int From af700eaed0564d5d3963a7a51cb0843629d7fe3d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Aug 2019 21:48:58 -0700 Subject: [PATCH 263/277] ubsan: build ubsan.c more conservatively objtool points out several conditions that it does not like, depending on the combination with other configuration options and compiler variants: stack protector: lib/ubsan.o: warning: objtool: __ubsan_handle_type_mismatch()+0xbf: call to __stack_chk_fail() with UACCESS enabled lib/ubsan.o: warning: objtool: __ubsan_handle_type_mismatch_v1()+0xbe: call to __stack_chk_fail() with UACCESS enabled stackleak plugin: lib/ubsan.o: warning: objtool: __ubsan_handle_type_mismatch()+0x4a: call to stackleak_track_stack() with UACCESS enabled lib/ubsan.o: warning: objtool: __ubsan_handle_type_mismatch_v1()+0x4a: call to stackleak_track_stack() with UACCESS enabled kasan: lib/ubsan.o: warning: objtool: __ubsan_handle_type_mismatch()+0x25: call to memcpy() with UACCESS enabled lib/ubsan.o: warning: objtool: __ubsan_handle_type_mismatch_v1()+0x25: call to memcpy() with UACCESS enabled The stackleak and kasan options just need to be disabled for this file as we do for other files already. For the stack protector, we already attempt to disable it, but this fails on clang because the check is mixed with the gcc specific -fno-conserve-stack option. According to Andrey Ryabinin, that option is not even needed, dropping it here fixes the stackprotector issue. Link: http://lkml.kernel.org/r/20190722125139.1335385-1-arnd@arndb.de Link: https://lore.kernel.org/lkml/20190617123109.667090-1-arnd@arndb.de/t/ Link: https://lore.kernel.org/lkml/20190722091050.2188664-1-arnd@arndb.de/t/ Fixes: d08965a27e84 ("x86/uaccess, ubsan: Fix UBSAN vs. SMAP") Signed-off-by: Arnd Bergmann Reviewed-by: Andrey Ryabinin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Kees Cook Cc: Matthew Wilcox Cc: Ard Biesheuvel Cc: Andy Shevchenko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Makefile b/lib/Makefile index 095601ce371d..29c02a924973 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -279,7 +279,8 @@ obj-$(CONFIG_UCS2_STRING) += ucs2_string.o obj-$(CONFIG_UBSAN) += ubsan.o UBSAN_SANITIZE_ubsan.o := n -CFLAGS_ubsan.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) +KASAN_SANITIZE_ubsan.o := n +CFLAGS_ubsan.o := $(call cc-option, -fno-stack-protector) $(DISABLE_STACKLEAK_PLUGIN) obj-$(CONFIG_SBITMAP) += sbitmap.o From ee38d94a0ad89890b770f6c876263cf9fcbfde84 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Aug 2019 21:49:02 -0700 Subject: [PATCH 264/277] page flags: prioritize kasan bits over last-cpuid ARM64 randdconfig builds regularly run into a build error, especially when NUMA_BALANCING and SPARSEMEM are enabled but not SPARSEMEM_VMEMMAP: #error "KASAN: not enough bits in page flags for tag" The last-cpuid bits are already contitional on the available space, so the result of the calculation is a bit random on whether they were already left out or not. Adding the kasan tag bits before last-cpuid makes it much more likely to end up with a successful build here, and should be reliable for randconfig at least, as long as that does not randomize NR_CPUS or NODES_SHIFT but uses the defaults. In order for the modified check to not trigger in the x86 vdso32 code where all constants are wrong (building with -m32), enclose all the definitions with an #ifdef. [arnd@arndb.de: build fix] Link: http://lkml.kernel.org/r/CAK8P3a3Mno1SWTcuAOT0Wa9VS15pdU6EfnkxLbDpyS55yO04+g@mail.gmail.com Link: http://lkml.kernel.org/r/20190722115520.3743282-1-arnd@arndb.de Link: https://lore.kernel.org/lkml/20190618095347.3850490-1-arnd@arndb.de/ Fixes: 2813b9c02962 ("kasan, mm, arm64: tag non slab memory allocated via pagealloc") Signed-off-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Reviewed-by: Andrey Konovalov Reviewed-by: Andrey Ryabinin Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Will Deacon Cc: Christoph Lameter Cc: Mark Rutland Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/vdso/vdso.h | 1 + include/linux/page-flags-layout.h | 18 +++++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/mips/vdso/vdso.h b/arch/mips/vdso/vdso.h index 14b1931be69c..b65b169778e3 100644 --- a/arch/mips/vdso/vdso.h +++ b/arch/mips/vdso/vdso.h @@ -9,6 +9,7 @@ #if _MIPS_SIM != _MIPS_SIM_ABI64 && defined(CONFIG_64BIT) /* Building 32-bit VDSO for the 64-bit kernel. Fake a 32-bit Kconfig. */ +#define BUILD_VDSO32_64 #undef CONFIG_64BIT #define CONFIG_32BIT 1 #ifndef __ASSEMBLY__ diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 1dda31825ec4..71283739ffd2 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -32,6 +32,7 @@ #endif /* CONFIG_SPARSEMEM */ +#ifndef BUILD_VDSO32_64 /* * page->flags layout: * @@ -76,20 +77,22 @@ #define LAST_CPUPID_SHIFT 0 #endif -#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS +#ifdef CONFIG_KASAN_SW_TAGS +#define KASAN_TAG_WIDTH 8 +#else +#define KASAN_TAG_WIDTH 0 +#endif + +#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT+KASAN_TAG_WIDTH \ + <= BITS_PER_LONG - NR_PAGEFLAGS #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT #else #define LAST_CPUPID_WIDTH 0 #endif -#ifdef CONFIG_KASAN_SW_TAGS -#define KASAN_TAG_WIDTH 8 #if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH+LAST_CPUPID_WIDTH+KASAN_TAG_WIDTH \ > BITS_PER_LONG - NR_PAGEFLAGS -#error "KASAN: not enough bits in page flags for tag" -#endif -#else -#define KASAN_TAG_WIDTH 0 +#error "Not enough bits in page flags" #endif /* @@ -104,4 +107,5 @@ #define LAST_CPUPID_NOT_IN_PAGE_FLAGS #endif +#endif #endif /* _LINUX_PAGE_FLAGS_LAYOUT */ From 315c69261dd3fa12dbc830d4fa00d1fad98d3b03 Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Fri, 2 Aug 2019 21:49:05 -0700 Subject: [PATCH 265/277] coredump: split pipe command whitespace before expanding template Save the offsets of the start of each argument to avoid having to update pointers to each argument after every corename krealloc and to avoid having to duplicate the memory for the dump command. Executable names containing spaces were previously being expanded from %e or %E and then split in the middle of the filename. This is incorrect behaviour since an argument list can represent arguments with spaces. The splitting could lead to extra arguments being passed to the core dump handler that it might have interpreted as options or ignored completely. Core dump handlers that are not aware of this Linux kernel issue will be using %e or %E without considering that it may be split and so they will be vulnerable to processes with spaces in their names breaking their argument list. If their internals are otherwise well written, such as if they are written in shell but quote arguments, they will work better after this change than before. If they are not well written, then there is a slight chance of breakage depending on the details of the code but they will already be fairly broken by the split filenames. Core dump handlers that are aware of this Linux kernel issue will be placing %e or %E as the last item in their core_pattern and then aggregating all of the remaining arguments into one, separated by spaces. Alternatively they will be obtaining the filename via other methods. Both of these will be compatible with the new arrangement. A side effect from this change is that unknown template types (for example %z) result in an empty argument to the dump handler instead of the argument being dropped. This is a desired change as: It is easier for dump handlers to process empty arguments than dropped ones, especially if they are written in shell or don't pass each template item with a preceding command-line option in order to differentiate between individual template types. Most core_patterns in the wild do not use options so they can confuse different template types (especially numeric ones) if an earlier one gets dropped in old kernels. If the kernel introduces a new template type and a core_pattern uses it, the core dump handler might not expect that the argument can be dropped in old kernels. For example, this can result in security issues when %d is dropped in old kernels. This happened with the corekeeper package in Debian and resulted in the interface between corekeeper and Linux having to be rewritten to use command-line options to differentiate between template types. The core_pattern for most core dump handlers is written by the handler author who would generally not insert unknown template types so this change should be compatible with all the core dump handlers that exist. Link: http://lkml.kernel.org/r/20190528051142.24939-1-pabs3@bonedaddy.net Fixes: 74aadce98605 ("core_pattern: allow passing of arguments to user mode helper when core_pattern is a pipe") Signed-off-by: Paul Wise Reported-by: Jakub Wilk [https://bugs.debian.org/924398] Reported-by: Paul Wise [https://lore.kernel.org/linux-fsdevel/c8b7ecb8508895bf4adb62a748e2ea2c71854597.camel@bonedaddy.net/] Suggested-by: Jakub Wilk Acked-by: Neil Horman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/coredump.c | 44 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index e42e17e55bfd..b1ea7dfbd149 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -187,11 +188,13 @@ put_exe_file: * name into corename, which must have space for at least * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. */ -static int format_corename(struct core_name *cn, struct coredump_params *cprm) +static int format_corename(struct core_name *cn, struct coredump_params *cprm, + size_t **argv, int *argc) { const struct cred *cred = current_cred(); const char *pat_ptr = core_pattern; int ispipe = (*pat_ptr == '|'); + bool was_space = false; int pid_in_pattern = 0; int err = 0; @@ -201,12 +204,35 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) return -ENOMEM; cn->corename[0] = '\0'; - if (ispipe) + if (ispipe) { + int argvs = sizeof(core_pattern) / 2; + (*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL); + if (!(*argv)) + return -ENOMEM; + (*argv)[(*argc)++] = 0; ++pat_ptr; + } /* Repeat as long as we have more pattern to process and more output space */ while (*pat_ptr) { + /* + * Split on spaces before doing template expansion so that + * %e and %E don't get split if they have spaces in them + */ + if (ispipe) { + if (isspace(*pat_ptr)) { + was_space = true; + pat_ptr++; + continue; + } else if (was_space) { + was_space = false; + err = cn_printf(cn, "%c", '\0'); + if (err) + return err; + (*argv)[(*argc)++] = cn->used; + } + } if (*pat_ptr != '%') { err = cn_printf(cn, "%c", *pat_ptr++); } else { @@ -546,6 +572,8 @@ void do_coredump(const kernel_siginfo_t *siginfo) struct cred *cred; int retval = 0; int ispipe; + size_t *argv = NULL; + int argc = 0; struct files_struct *displaced; /* require nonrelative corefile path and be extra careful */ bool need_suid_safe = false; @@ -592,9 +620,10 @@ void do_coredump(const kernel_siginfo_t *siginfo) old_cred = override_creds(cred); - ispipe = format_corename(&cn, &cprm); + ispipe = format_corename(&cn, &cprm, &argv, &argc); if (ispipe) { + int argi; int dump_count; char **helper_argv; struct subprocess_info *sub_info; @@ -637,12 +666,16 @@ void do_coredump(const kernel_siginfo_t *siginfo) goto fail_dropcount; } - helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL); + helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), + GFP_KERNEL); if (!helper_argv) { printk(KERN_WARNING "%s failed to allocate memory\n", __func__); goto fail_dropcount; } + for (argi = 0; argi < argc; argi++) + helper_argv[argi] = cn.corename + argv[argi]; + helper_argv[argi] = NULL; retval = -ENOMEM; sub_info = call_usermodehelper_setup(helper_argv[0], @@ -652,7 +685,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) retval = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); - argv_free(helper_argv); + kfree(helper_argv); if (retval) { printk(KERN_INFO "Core dump to |%s pipe failed\n", cn.corename); @@ -766,6 +799,7 @@ fail_dropcount: if (ispipe) atomic_dec(&core_dump_count); fail_unlock: + kfree(argv); kfree(cn.corename); coredump_finish(mm, core_dumped); revert_creds(old_cred); From 7b358c6f12dc82364f6d317f8c8f1d794adbc3f5 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 2 Aug 2019 21:49:08 -0700 Subject: [PATCH 266/277] mm/migrate.c: initialize pud_entry in migrate_vma() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_MIGRATE_VMA_HELPER is enabled, migrate_vma() calls migrate_vma_collect() which initializes a struct mm_walk but didn't initialize mm_walk.pud_entry. (Found by code inspection) Use a C structure initialization to make sure it is set to NULL. Link: http://lkml.kernel.org/r/20190719233225.12243-1-rcampbell@nvidia.com Fixes: 8763cb45ab967 ("mm/migrate: new memory migration helper for use with device memory") Signed-off-by: Ralph Campbell Reviewed-by: John Hubbard Reviewed-by: Andrew Morton Cc: "Jérôme Glisse" Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 515718392b24..a42858d8e00b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2340,16 +2340,13 @@ next: static void migrate_vma_collect(struct migrate_vma *migrate) { struct mmu_notifier_range range; - struct mm_walk mm_walk; - - mm_walk.pmd_entry = migrate_vma_collect_pmd; - mm_walk.pte_entry = NULL; - mm_walk.pte_hole = migrate_vma_collect_hole; - mm_walk.hugetlb_entry = NULL; - mm_walk.test_walk = NULL; - mm_walk.vma = migrate->vma; - mm_walk.mm = migrate->vma->vm_mm; - mm_walk.private = migrate; + struct mm_walk mm_walk = { + .pmd_entry = migrate_vma_collect_pmd, + .pte_hole = migrate_vma_collect_hole, + .vma = migrate->vma, + .mm = migrate->vma->vm_mm, + .private = migrate, + }; mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm_walk.mm, migrate->start, From aa4996b3af19f8535177ba21cb7241348a34fb94 Mon Sep 17 00:00:00 2001 From: Weitao Hou Date: Fri, 2 Aug 2019 21:49:12 -0700 Subject: [PATCH 267/277] mm/memory_hotplug.c: remove unneeded return for void function return is unneeded in void function Link: http://lkml.kernel.org/r/20190723130814.21826-1-houweitaoo@gmail.com Signed-off-by: Weitao Hou Reviewed-by: David Hildenbrand Reviewed-by: Oscar Salvador Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 2a9bbddb0e55..c73f09913165 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -132,7 +132,6 @@ static void release_memory_resource(struct resource *res) return; release_resource(res); kfree(res); - return; } #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE @@ -979,7 +978,6 @@ static void rollback_node_hotadd(int nid) arch_refresh_nodedata(nid, NULL); free_percpu(pgdat->per_cpu_nodestats); arch_free_nodedata(pgdat); - return; } From b59b1baab789eacdde809135542e3d4f256f6878 Mon Sep 17 00:00:00 2001 From: Chris Down Date: Fri, 2 Aug 2019 21:49:15 -0700 Subject: [PATCH 268/277] cgroup: kselftest: relax fs_spec checks On my laptop most memcg kselftests were being skipped because it claimed cgroup v2 hierarchy wasn't mounted, but this isn't correct. Instead, it seems current systemd HEAD mounts it with the name "cgroup2" instead of "cgroup": % grep cgroup /proc/mounts cgroup2 /sys/fs/cgroup cgroup2 rw,nosuid,nodev,noexec,relatime,nsdelegate 0 0 I can't think of a reason to need to check fs_spec explicitly since it's arbitrary, so we can just rely on fs_vfstype. After these changes, `make TARGETS=cgroup kselftest` actually runs the cgroup v2 tests in more cases. Link: http://lkml.kernel.org/r/20190723210737.GA487@chrisdown.name Signed-off-by: Chris Down Cc: Johannes Weiner Cc: Tejun Heo Cc: Roman Gushchin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/cgroup/cgroup_util.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 4c223266299a..bdb69599c4bd 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -191,8 +191,7 @@ int cg_find_unified_root(char *root, size_t len) strtok(NULL, delim); strtok(NULL, delim); - if (strcmp(fs, "cgroup") == 0 && - strcmp(type, "cgroup2") == 0) { + if (strcmp(type, "cgroup2") == 0) { strncpy(root, mount, len); return 0; } From cbedfe11347fe418621bd188d58a206beb676218 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 2 Aug 2019 21:49:19 -0700 Subject: [PATCH 269/277] asm-generic: fix -Wtype-limits compiler warnings Commit d66acc39c7ce ("bitops: Optimise get_order()") introduced a compilation warning because "rx_frag_size" is an "ushort" while PAGE_SHIFT here is 16. The commit changed the get_order() to be a multi-line macro where compilers insist to check all statements in the macro even when __builtin_constant_p(rx_frag_size) will return false as "rx_frag_size" is a module parameter. In file included from ./arch/powerpc/include/asm/page_64.h:107, from ./arch/powerpc/include/asm/page.h:242, from ./arch/powerpc/include/asm/mmu.h:132, from ./arch/powerpc/include/asm/lppaca.h:47, from ./arch/powerpc/include/asm/paca.h:17, from ./arch/powerpc/include/asm/current.h:13, from ./include/linux/thread_info.h:21, from ./arch/powerpc/include/asm/processor.h:39, from ./include/linux/prefetch.h:15, from drivers/net/ethernet/emulex/benet/be_main.c:14: drivers/net/ethernet/emulex/benet/be_main.c: In function 'be_rx_cqs_create': ./include/asm-generic/getorder.h:54:9: warning: comparison is always true due to limited range of data type [-Wtype-limits] (((n) < (1UL << PAGE_SHIFT)) ? 0 : \ ^ drivers/net/ethernet/emulex/benet/be_main.c:3138:33: note: in expansion of macro 'get_order' adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE; ^~~~~~~~~ Fix it by moving all of this multi-line macro into a proper function, and killing __get_order() off. [akpm@linux-foundation.org: remove __get_order() altogether] [cai@lca.pw: v2] Link: http://lkml.kernel.org/r/1564000166-31428-1-git-send-email-cai@lca.pw Link: http://lkml.kernel.org/r/1563914986-26502-1-git-send-email-cai@lca.pw Fixes: d66acc39c7ce ("bitops: Optimise get_order()") Signed-off-by: Qian Cai Reviewed-by: Nathan Chancellor Cc: David S. Miller Cc: Arnd Bergmann Cc: David Howells Cc: Jakub Jelinek Cc: Nick Desaulniers Cc: Bill Wendling Cc: James Y Knight Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/getorder.h | 50 ++++++++++++++-------------------- 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h index c64bea7a52be..e9f20b813a69 100644 --- a/include/asm-generic/getorder.h +++ b/include/asm-generic/getorder.h @@ -7,24 +7,6 @@ #include #include -/* - * Runtime evaluation of get_order() - */ -static inline __attribute_const__ -int __get_order(unsigned long size) -{ - int order; - - size--; - size >>= PAGE_SHIFT; -#if BITS_PER_LONG == 32 - order = fls(size); -#else - order = fls64(size); -#endif - return order; -} - /** * get_order - Determine the allocation order of a memory size * @size: The size for which to get the order @@ -43,19 +25,27 @@ int __get_order(unsigned long size) * to hold an object of the specified size. * * The result is undefined if the size is 0. - * - * This function may be used to initialise variables with compile time - * evaluations of constants. */ -#define get_order(n) \ -( \ - __builtin_constant_p(n) ? ( \ - ((n) == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ - (((n) < (1UL << PAGE_SHIFT)) ? 0 : \ - ilog2((n) - 1) - PAGE_SHIFT + 1) \ - ) : \ - __get_order(n) \ -) +static inline __attribute_const__ int get_order(unsigned long size) +{ + if (__builtin_constant_p(size)) { + if (!size) + return BITS_PER_LONG - PAGE_SHIFT; + + if (size < (1UL << PAGE_SHIFT)) + return 0; + + return ilog2((size) - 1) - PAGE_SHIFT + 1; + } + + size--; + size >>= PAGE_SHIFT; +#if BITS_PER_LONG == 32 + return fls(size); +#else + return fls64(size); +#endif +} #endif /* __ASSEMBLY__ */ From 733d1d1a7745113e2b6a1761300e7e26b6eb6009 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 2 Aug 2019 21:49:22 -0700 Subject: [PATCH 270/277] lib/test_meminit.c: use GFP_ATOMIC in RCU critical section kmalloc() shouldn't sleep while in RCU critical section, therefore use GFP_ATOMIC instead of GFP_KERNEL. The bug was spotted by the 0day kernel testing robot. Link: http://lkml.kernel.org/r/20190725121703.210874-1-glider@google.com Fixes: 7e659650cbda ("lib: introduce test_meminit module") Signed-off-by: Alexander Potapenko Reviewed-by: Andrew Morton Reported-by: kernel test robot Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_meminit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_meminit.c b/lib/test_meminit.c index 62d19f270cad..9729f271d150 100644 --- a/lib/test_meminit.c +++ b/lib/test_meminit.c @@ -222,7 +222,7 @@ static int __init do_kmem_cache_size(size_t size, bool want_ctor, * Copy the buffer to check that it's not wiped on * free(). */ - buf_copy = kmalloc(size, GFP_KERNEL); + buf_copy = kmalloc(size, GFP_ATOMIC); if (buf_copy) memcpy(buf_copy, buf, size); From 14c5cebad510c2875ca525f36605b47058769670 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Aug 2019 21:49:26 -0700 Subject: [PATCH 271/277] memremap: move from kernel/ to mm/ memremap.c implements MM functionality for ZONE_DEVICE, so it really should be in the mm/ directory, not the kernel/ one. Link: http://lkml.kernel.org/r/20190722094143.18387-1-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Anshuman Khandual Acked-by: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/Makefile | 1 - mm/Makefile | 1 + {kernel => mm}/memremap.c | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename {kernel => mm}/memremap.c (100%) diff --git a/kernel/Makefile b/kernel/Makefile index a8d923b5481b..ef0d95a190b4 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -111,7 +111,6 @@ obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o obj-$(CONFIG_TORTURE_TEST) += torture.o obj-$(CONFIG_HAS_IOMEM) += iomem.o -obj-$(CONFIG_ZONE_DEVICE) += memremap.o obj-$(CONFIG_RSEQ) += rseq.o obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o diff --git a/mm/Makefile b/mm/Makefile index 338e528ad436..d0b295c3b764 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -102,5 +102,6 @@ obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o +obj-$(CONFIG_ZONE_DEVICE) += memremap.o obj-$(CONFIG_HMM_MIRROR) += hmm.o obj-$(CONFIG_MEMFD_CREATE) += memfd.o diff --git a/kernel/memremap.c b/mm/memremap.c similarity index 100% rename from kernel/memremap.c rename to mm/memremap.c From 7291edca20215dfdf0eb841881d63753448ef09c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 2 Aug 2019 21:49:29 -0700 Subject: [PATCH 272/277] drivers/acpi/scan.c: document why we don't need the device_hotplug_lock Let's document why the lock is not needed in acpi_scan_init(), right now this is not really obvious. [akpm@linux-foundation.org: fix tpyo] Link: http://lkml.kernel.org/r/20190731135306.31524-1-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Michal Hocko Acked-by: Rafael J. Wysocki Cc: Oscar Salvador Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/scan.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 0e28270b0fd8..aad6be5c0af0 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2204,6 +2204,12 @@ int __init acpi_scan_init(void) acpi_gpe_apply_masked_gpes(); acpi_update_all_gpes(); + /* + * Although we call __add_memory() that is documented to require the + * device_hotplug_lock, it is not necessary here because this is an + * early code when userspace or any other code path cannot trigger + * hotplug/hotunplug operations. + */ mutex_lock(&acpi_scan_lock); /* * Enumerate devices in the ACPI namespace. From 0c5b6c28ed68becb692b43eae5e44d5aa7e160ce Mon Sep 17 00:00:00 2001 From: "M. Vefa Bicakci" Date: Sat, 3 Aug 2019 06:02:12 -0400 Subject: [PATCH 273/277] kconfig: Clear "written" flag to avoid data loss Prior to this commit, starting nconfig, xconfig or gconfig, and saving the .config file more than once caused data loss, where a .config file that contained only comments would be written to disk starting from the second save operation. This bug manifests itself because the SYMBOL_WRITTEN flag is never cleared after the first call to conf_write, and subsequent calls to conf_write then skip all of the configuration symbols due to the SYMBOL_WRITTEN flag being set. This commit resolves this issue by clearing the SYMBOL_WRITTEN flag from all symbols before conf_write returns. Fixes: 8e2442a5f86e ("kconfig: fix missing choice values in auto.conf") Cc: linux-stable # 4.19+ Signed-off-by: M. Vefa Bicakci Signed-off-by: Masahiro Yamada --- scripts/kconfig/confdata.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 1134892599da..3569d2dec37c 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -848,6 +848,7 @@ int conf_write(const char *name) const char *str; char tmpname[PATH_MAX + 1], oldname[PATH_MAX + 1]; char *env; + int i; bool need_newline = false; if (!name) @@ -930,6 +931,9 @@ next: } fclose(out); + for_all_symbols(i, sym) + sym->flags &= ~SYMBOL_WRITTEN; + if (*tmpname) { if (is_same(name, tmpname)) { conf_message("No change to %s", name); From 4c0d228c3bd498b3119d68eb41a17880f7728993 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 29 Jul 2019 19:56:58 +0200 Subject: [PATCH 274/277] MAINTAINERS: Add Geert as Renesas SoC Co-Maintainer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the end of the v5.3 upstream kernel development cycle, Simon will be stepping down from his role as Renesas SoC maintainer. Starting with the v5.4 development cycle, Geert is taking over this role. Add Geert as a co-maintainer, and add his git repository and branch. Signed-off-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund Acked-by: Simon Horman Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0f5004592ffc..a2c343ee3b2c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2155,10 +2155,12 @@ F: Documentation/devicetree/bindings/arm/realtek.txt ARM/RENESAS ARM64 ARCHITECTURE M: Simon Horman +M: Geert Uytterhoeven M: Magnus Damm L: linux-renesas-soc@vger.kernel.org Q: http://patchwork.kernel.org/project/linux-renesas-soc/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next +T: git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git next S: Supported F: arch/arm64/boot/dts/renesas/ F: Documentation/devicetree/bindings/arm/renesas.yaml @@ -2269,10 +2271,12 @@ F: drivers/media/platform/s5p-mfc/ ARM/SHMOBILE ARM ARCHITECTURE M: Simon Horman +M: Geert Uytterhoeven M: Magnus Damm L: linux-renesas-soc@vger.kernel.org Q: http://patchwork.kernel.org/project/linux-renesas-soc/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next +T: git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git next S: Supported F: arch/arm/boot/dts/emev2* F: arch/arm/boot/dts/gr-peach* From 1e5ac6300a07ceecfc70a893ebef3352be21e6f8 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Thu, 4 Jul 2019 09:26:15 +0200 Subject: [PATCH 275/277] tpm: Fix null pointer dereference on chip register error path If clk_enable is not defined and chip initialization is canceled code hits null dereference. Easily reproducible with vTPM init fail: swtpm chardev --tpmstate dir=nonexistent_dir --tpm2 --vtpm-proxy BUG: kernel NULL pointer dereference, address: 00000000 ... Call Trace: tpm_chip_start+0x9d/0xa0 [tpm] tpm_chip_register+0x10/0x1a0 [tpm] vtpm_proxy_work+0x11/0x30 [tpm_vtpm_proxy] process_one_work+0x214/0x5a0 worker_thread+0x134/0x3e0 ? process_one_work+0x5a0/0x5a0 kthread+0xd4/0x100 ? process_one_work+0x5a0/0x5a0 ? kthread_park+0x90/0x90 ret_from_fork+0x19/0x24 Fixes: 719b7d81f204 ("tpm: introduce tpm_chip_start() and tpm_chip_stop()") Cc: stable@vger.kernel.org # v5.1+ Signed-off-by: Milan Broz Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm-chip.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index d47ad10a35fe..bf868260f435 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -77,6 +77,18 @@ static int tpm_go_idle(struct tpm_chip *chip) return chip->ops->go_idle(chip); } +static void tpm_clk_enable(struct tpm_chip *chip) +{ + if (chip->ops->clk_enable) + chip->ops->clk_enable(chip, true); +} + +static void tpm_clk_disable(struct tpm_chip *chip) +{ + if (chip->ops->clk_enable) + chip->ops->clk_enable(chip, false); +} + /** * tpm_chip_start() - power on the TPM * @chip: a TPM chip to use @@ -89,13 +101,12 @@ int tpm_chip_start(struct tpm_chip *chip) { int ret; - if (chip->ops->clk_enable) - chip->ops->clk_enable(chip, true); + tpm_clk_enable(chip); if (chip->locality == -1) { ret = tpm_request_locality(chip); if (ret) { - chip->ops->clk_enable(chip, false); + tpm_clk_disable(chip); return ret; } } @@ -103,8 +114,7 @@ int tpm_chip_start(struct tpm_chip *chip) ret = tpm_cmd_ready(chip); if (ret) { tpm_relinquish_locality(chip); - if (chip->ops->clk_enable) - chip->ops->clk_enable(chip, false); + tpm_clk_disable(chip); return ret; } @@ -124,8 +134,7 @@ void tpm_chip_stop(struct tpm_chip *chip) { tpm_go_idle(chip); tpm_relinquish_locality(chip); - if (chip->ops->clk_enable) - chip->ops->clk_enable(chip, false); + tpm_clk_disable(chip); } EXPORT_SYMBOL_GPL(tpm_chip_stop); From fa4f99c05320eb28bf6ba52a9adf64d888da1f9e Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Thu, 11 Jul 2019 12:13:35 -0400 Subject: [PATCH 276/277] tpm: tpm_ibm_vtpm: Fix unallocated banks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nr_allocated_banks and allocated banks are initialized as part of tpm_chip_register. Currently, this is done as part of auto startup function. However, some drivers, like the ibm vtpm driver, do not run auto startup during initialization. This results in uninitialized memory issue and causes a kernel panic during boot. This patch moves the pcr allocation outside the auto startup function into tpm_chip_register. This ensures that allocated banks are initialized in any case. Fixes: 879b589210a9 ("tpm: retrieve digest size of unknown algorithms with PCR read") Reported-by: Michal Suchanek Signed-off-by: Nayna Jain Reviewed-by: Mimi Zohar Tested-by: Sachin Sant Tested-by: Michal Suchánek Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm-chip.c | 20 ++++++++++++++++++++ drivers/char/tpm/tpm.h | 2 ++ drivers/char/tpm/tpm1-cmd.c | 36 ++++++++++++++++++++++++------------ drivers/char/tpm/tpm2-cmd.c | 6 +----- 4 files changed, 47 insertions(+), 17 deletions(-) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index bf868260f435..4838c6a9f0f2 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -554,6 +554,20 @@ static int tpm_add_hwrng(struct tpm_chip *chip) return hwrng_register(&chip->hwrng); } +static int tpm_get_pcr_allocation(struct tpm_chip *chip) +{ + int rc; + + rc = (chip->flags & TPM_CHIP_FLAG_TPM2) ? + tpm2_get_pcr_allocation(chip) : + tpm1_get_pcr_allocation(chip); + + if (rc > 0) + return -ENODEV; + + return rc; +} + /* * tpm_chip_register() - create a character device for the TPM chip * @chip: TPM chip to use. @@ -573,6 +587,12 @@ int tpm_chip_register(struct tpm_chip *chip) if (rc) return rc; rc = tpm_auto_startup(chip); + if (rc) { + tpm_chip_stop(chip); + return rc; + } + + rc = tpm_get_pcr_allocation(chip); tpm_chip_stop(chip); if (rc) return rc; diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index e503ffc3aa39..a7fea3e0ca86 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -394,6 +394,7 @@ int tpm1_pcr_read(struct tpm_chip *chip, u32 pcr_idx, u8 *res_buf); ssize_t tpm1_getcap(struct tpm_chip *chip, u32 subcap_id, cap_t *cap, const char *desc, size_t min_cap_length); int tpm1_get_random(struct tpm_chip *chip, u8 *out, size_t max); +int tpm1_get_pcr_allocation(struct tpm_chip *chip); unsigned long tpm_calc_ordinal_duration(struct tpm_chip *chip, u32 ordinal); int tpm_pm_suspend(struct device *dev); int tpm_pm_resume(struct device *dev); @@ -449,6 +450,7 @@ int tpm2_unseal_trusted(struct tpm_chip *chip, ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id, u32 *value, const char *desc); +ssize_t tpm2_get_pcr_allocation(struct tpm_chip *chip); int tpm2_auto_startup(struct tpm_chip *chip); void tpm2_shutdown(struct tpm_chip *chip, u16 shutdown_type); unsigned long tpm2_calc_ordinal_duration(struct tpm_chip *chip, u32 ordinal); diff --git a/drivers/char/tpm/tpm1-cmd.c b/drivers/char/tpm/tpm1-cmd.c index faacbe1ffa1a..149e953ca369 100644 --- a/drivers/char/tpm/tpm1-cmd.c +++ b/drivers/char/tpm/tpm1-cmd.c @@ -699,18 +699,6 @@ int tpm1_auto_startup(struct tpm_chip *chip) goto out; } - chip->allocated_banks = kcalloc(1, sizeof(*chip->allocated_banks), - GFP_KERNEL); - if (!chip->allocated_banks) { - rc = -ENOMEM; - goto out; - } - - chip->allocated_banks[0].alg_id = TPM_ALG_SHA1; - chip->allocated_banks[0].digest_size = hash_digest_size[HASH_ALGO_SHA1]; - chip->allocated_banks[0].crypto_id = HASH_ALGO_SHA1; - chip->nr_allocated_banks = 1; - return rc; out: if (rc > 0) @@ -779,3 +767,27 @@ int tpm1_pm_suspend(struct tpm_chip *chip, u32 tpm_suspend_pcr) return rc; } +/** + * tpm1_get_pcr_allocation() - initialize the allocated bank + * @chip: TPM chip to use. + * + * The function initializes the SHA1 allocated bank to extend PCR + * + * Return: + * * 0 on success, + * * < 0 on error. + */ +int tpm1_get_pcr_allocation(struct tpm_chip *chip) +{ + chip->allocated_banks = kcalloc(1, sizeof(*chip->allocated_banks), + GFP_KERNEL); + if (!chip->allocated_banks) + return -ENOMEM; + + chip->allocated_banks[0].alg_id = TPM_ALG_SHA1; + chip->allocated_banks[0].digest_size = hash_digest_size[HASH_ALGO_SHA1]; + chip->allocated_banks[0].crypto_id = HASH_ALGO_SHA1; + chip->nr_allocated_banks = 1; + + return 0; +} diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index d103545e4055..ba9acae83bff 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -840,7 +840,7 @@ struct tpm2_pcr_selection { u8 pcr_select[3]; } __packed; -static ssize_t tpm2_get_pcr_allocation(struct tpm_chip *chip) +ssize_t tpm2_get_pcr_allocation(struct tpm_chip *chip) { struct tpm2_pcr_selection pcr_selection; struct tpm_buf buf; @@ -1040,10 +1040,6 @@ int tpm2_auto_startup(struct tpm_chip *chip) goto out; } - rc = tpm2_get_pcr_allocation(chip); - if (rc) - goto out; - rc = tpm2_get_cc_attrs_tbl(chip); out: From e21a712a9685488f5ce80495b37b9fdbe96c230d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Aug 2019 18:40:12 -0700 Subject: [PATCH 277/277] Linux 5.3-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5ee6f6889869..23cdf1f41364 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 3 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Bobtail Squid # *DOCUMENTATION*